Analysis of Coarse Grained A2 Simulations¶

1) Notebook Setup¶

1.1) Import Required Modules¶

In [34]:
import MDAnalysis as mda
import MDAnalysis.analysis.msd as msd
import MDAnalysis.analysis.rdf
from MDAnalysis import transformations
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.lines as mlines
import pandas as pd
import scipy.stats
from scipy.signal import find_peaks
import math
import pandas as pd
import re
  
%matplotlib inline

1.2) Plot Parameters¶

In [35]:
# Reset parameters to their default state.
mpl.rcParams.update(mpl.rcParamsDefault)

# Font parameters.
mpl.rcParams["font.size"] = 20
mpl.rcParams["axes.labelsize"] = 30
mpl.rcParams["lines.linewidth"] = 2.3
mpl.rcParams["font.family"] = "Serif"

1.3) Analysis Parameters¶

In [36]:
# Load trajectory
u = mda.Universe('trajectory/image1.tpr', 'trajectory/reimaged1.xtc')

# Divisor used to track the progress of analysis
track_divisor = int(len(u.trajectory) / 10)
track_divisor 
Out[36]:
1500

2) Preparation¶

Collect some data that will be used throughout and define a few functions for facilitating analysis.

2.1) Get Time¶

Get and save the time so that it can be used during plotting.

2.1.1) Load Trajectory Collect, and Save Time¶

In [4]:
## Load trajectory
#u = mda.Universe('trajectory/image1.tpr', 'trajectory/reimaged1.xtc')
In [5]:
#ii = 0
#time = []
#for ts in u.trajectory:
#    if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
#    time.append(u.trajectory.time)
#    ii += 1
#    
## Convert time units from ps to us
#time = np.array(time)
#time
In [6]:
#time = pd.DataFrame(time)
#time.to_csv("analysis/time/time.csv")

2.1.2) Quick Import After First Time¶

In [7]:
time = pd.read_csv("analysis/time/time.csv")
time = np.array(time["Time"])
time
Out[7]:
array([0.0000e+00, 1.0000e+03, 2.0000e+03, ..., 1.4998e+07, 1.4999e+07,
       1.5000e+07])

2.2) Function Definitions¶

2.2.1) Import All Trajectories¶

In [8]:
# A dictionary containing all of the trajectories is returned.
def get_trajectories():
    trajectories = {}
    for ii in range(1, 6):
        u = mda.Universe("trajectory/image" + str(ii) + ".tpr", 
                         "trajectory/reimaged" + str(ii) + ".xtc")
        trajectories["trajectory" + str(ii)] = u
    
    return(trajectories)

2.2.2) Iterate Analysis over Trajectories¶

In [9]:
# Generic iterator to perform a given analysis on each trajectory
# and return the results as a dictionary.
# Takes 
# (1) A function to perform the analysis.
# (2) The analysis dictionary.
def analyze_trajectories(analysis, trajectories):
    results = {}
    for traj in trajectories:
        results[traj] = analysis(trajectories[traj])
    return(results)

2.2.3) Iterate Plotting/Analysis Function Over Trajectory¶

In [10]:
# Generic iterator to perform a given analysis on each trajectory
# and return the results as a dictionary.
# Takes 
# (1) A function to perform the analysis.
# (2) The analysis dictionary.
def present_analysis(presentation, trajectories):
    for traj in trajectories:
        print("The following plots correcpond to:", traj)
        presentation(trajectories[traj])

3) Eccentricity¶

3.1) Necessary Functions¶

3.1.1) Calculate Moments of Inertia¶

In [13]:
# Provide an mdanalysis universe
def calculate_moi(u):
    # Create selections for groups that may be used for analysis
    bilayer = u.select_atoms("resname POPC or resname POPS or " + \
                             "resname POP2 or resname CHOL")
    ii = 0 # Used to track calculation progress
    # Moment of inertia about each cartesian axis
    Ix = []
    Iy = []
    Iz = []
    # N x 3 mass matrix for faster multiplication
    mass = bilayer.positions
    mass[:, 0] = bilayer.masses
    mass[:, 1] = bilayer.masses
    mass[:, 2] = bilayer.masses
    # Iterate over all timesteps
    for ts in u.trajectory:
        # Caculate Ix, Iy, Iz
        bilayer_coordinates = bilayer.positions -      \
                              bilayer.center_of_mass()
        position_squared_times_mass = bilayer_coordinates * \
                                      bilayer_coordinates * \
                                      mass
        Ix.append(np.sum(position_squared_times_mass[:, 1] + \
                         position_squared_times_mass[:, 2]))
        Iy.append(np.sum(position_squared_times_mass[:, 0] + \
                         position_squared_times_mass[:, 2]))
        Iz.append(np.sum(position_squared_times_mass[:, 0] + \
                         position_squared_times_mass[:, 1]))
    
        # Check progress
        if ii % track_divisor == 0: 
            print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
        ii += 1
    moi_results = [Ix, Iy, Iz]
    print(' ')
    return(moi_results)

3.1.2) Presenting Eccentricity Results¶

In [42]:
def present_moi_results(moi_data):
    # Collect data
    Ix, Iy, Iz = moi_data[0], moi_data[1], moi_data[2]
    I_all = np.array(Ix + Iy + Iz)
    I_min = np.min(I_all)
    I_avg = np.average(I_all)
    
    # Calculate eccentricity
    eccentricity = 1 - (I_min / I_avg)
    print("Imin is: %f" % I_min)
    print("Iavg is: %f" % I_avg)
    print("The eccentricity of the vesicle is: %f" % eccentricity)
    
    # Initialize plotting.
    fig, ax = plt.subplots()
    
    # Remove top and right spines from the figure.
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    
    # Adjust figure size.
    fig.set_size_inches(7.5, 4)
    
    # Plot data.
    ax.plot(time / 10**6, Ix, label = "Ix")
    ax.plot(time / 10**6, Iy, label = "Iy")
    ax.plot(time / 10**6, Iz, label = "Iz")
    
    ax.axhline(I_min, color = "red",linestyle = "--")
    #ax.annotate('$I_{avg}$', xy=(-0.8, 2.285e11), xytext=(-0.8, 2.285e11))
    
    ax.axhline(I_avg, color = "black",linestyle = "--")
    #ax.annotate('$I_{min}$', xy=(-0.8, 2.22e11), xytext=(-0.8, 2.22e11))
    
    # Set labels
    ax.set_xlabel("Time (µs)")
    ax.set_ylabel("MOI (amu Ų)")
    
    ax.legend(loc = "upper right")
    
    plt.show()

3.1.3) Combined Plot¶

In [43]:
def combined_moi_plot(moi_results):
    
    fig = plt.figure()
    # Adjust figure size.
    fig.set_size_inches(27, 21) 
    spec = mpl.gridspec.GridSpec(ncols=4, nrows=3) # 6 columns evenly divides both 2 & 3
    
    ax1 = fig.add_subplot(spec[0,0:2]) # row 0 with axes spanning 2 cols on evens
    ax2 = fig.add_subplot(spec[0,2:4])
    ax3 = fig.add_subplot(spec[1,0:2]) # row 0 with axes spanning 2 cols on odds
    ax4 = fig.add_subplot(spec[1,2:4])
    ax5 = fig.add_subplot(spec[2,1:3])
    axes = [ax1, ax2, ax3, ax4, ax5]

    for ii in range(0, 5):
        trajectory = "trajectory" + str(ii + 1)
        plot_data = moi_results[trajectory]
        
        # Collect data
        Ix, Iy, Iz = plot_data[0], plot_data[1], plot_data[2]
        I_all = np.array(Ix + Iy + Iz)
        I_min = np.min(I_all)
        I_avg = np.average(I_all)
        
        # Calculate eccentricity
        eccentricity = 1 - (I_min / I_avg)
        print("Imin is: %f" % I_min)
        print("Iavg is: %f" % I_avg)
        print("The eccentricity of the vesicle is: %f" % eccentricity)
    
        # Remove top and right spines from the figure.
        axes[ii].spines["top"].set_visible(False)
        axes[ii].spines["right"].set_visible(False)
    
        # Plot data.
        axes[ii].plot(time / 10**6, Ix, label = "Ix")
        axes[ii].plot(time / 10**6, Iy, label = "Iy")
        axes[ii].plot(time / 10**6, Iz, label = "Iz")
    
        axes[ii].axhline(I_min, color = "red",linestyle = "--")
        axes[ii].text(0., I_min + 150000000, '$I_{avg}$')
    
        axes[ii].axhline(I_avg, color = "black",linestyle = "--")
    
        # Set labels
        axes[ii].set_xlabel("Time (µs)")
        axes[ii].set_ylabel("MOI (amu Ų)")
        
        axes[ii].legend(loc = "upper right")

    plt.tight_layout()
    plt.show()

3.2) Re-Initialize Data and Perform Calculations¶

In [15]:
#trajectories = get_trajectories()
#moi_results = analyze_trajectories(calculate_moi, trajectories)
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  

3.3) Save Results¶

In [16]:
#for key in moi_results:
#    data = {}
#    data["Ix"] = moi_results[key][0]
#    data["Iy"] = moi_results[key][1]
#    data["Iz"] = moi_results[key][2]
#    data = pd.DataFrame.from_dict(data)
#    data.to_csv("analysis/eccentricity/" + key + ".csv", index = False)

3.4) Read Saved Results¶

In [41]:
prefix = "analysis/eccentricity/"
moi_results = {}
for ii in range(1, 6):
    data = pd.read_csv(prefix + "trajectory" + str(ii) + ".csv")
    moi_results["trajectory" + str(ii)] = [list(data.iloc[:, 0]),
                                           list(data.iloc[:, 1]),
                                           list(data.iloc[:, 2])]

3.5) Plot Moments of Inertia¶

In [3]:
#present_analysis(present_moi_results, moi_results)
In [45]:
combined_moi_plot(moi_results)
Imin is: 25105518893.873730
Iavg is: 26883445986.242313
The eccentricity of the vesicle is: 0.066135
Imin is: 25040613151.147152
Iavg is: 26800202812.448631
The eccentricity of the vesicle is: 0.065656
Imin is: 24477737596.515190
Iavg is: 26653219079.009644
The eccentricity of the vesicle is: 0.081622
Imin is: 24894967126.129841
Iavg is: 26731160513.143406
The eccentricity of the vesicle is: 0.068691
Imin is: 24453023185.326435
Iavg is: 26607182251.565849
The eccentricity of the vesicle is: 0.080962
No description has been provided for this image

3.6) Average Eccentricity¶

In [263]:
mean = np.mean([0.066135, 0.065656, 0.081622, 0.068691, 0.080962])
std = np.std([0.066135, 0.065656, 0.081622, 0.068691, 0.080962])
stderr_x2 = np.std([0.066135, 0.065656, 0.081622, 0.068691, 0.080962]) / np.sqrt(5.)
print("The mean eccentricity is:", mean, '±', stderr_x2)
The mean eccentricity is: 0.0726132 ± 0.003203837067018234

4) Radial Distribution for Membrane Dimensions¶

4.1) Necessary Functions¶

4.1.1) Universe Transformation to Switch an Atom's Coordinates with the System's COM¶

In [1]:
class replace_with_COM:
    """Replace special atom `atomname` in each fragment with COM of the fragment."""
    def __init__(self, bilayer, selection):
        self.bilayer = bilayer
        self.com_atoms = bilayer.select_atoms(selection)
        
    def get_com(self):
        return self.bilayer.center_of_mass()
    
    def __call__(self, ts):
        self.com_atoms.positions = np.array([list(self.get_com())])
        return ts

4.1.2) Radial Distribution Function Calculation¶

In [21]:
def calculate_rdf(u):
    ucom = mda.Universe(u.filename, u.trajectory.filename)
    # Create selections for groups that may be used for analysis
    selection = "(resname POPC and name PO4 NC3) or " + \
                "(resname POPS and name PO4 CNO) or " + \
                "(resname POP2 and name P1 P2 C1 C2 C3 PO4)"
    head_group = ucom.select_atoms(selection)
    bilayer = ucom.select_atoms("resname POPC or resname POPS or " + \
                                "resname POP2 or resname CHOL")
    selection = "resnum 1 and name C4A"
    ucom.trajectory.add_transformations(replace_with_COM(bilayer, selection))
    
    selection = "resnum 1 and name C4A"
    com_atoms = ucom.select_atoms(selection)
    
    rdf = mda.analysis.rdf.InterRDF(com_atoms, head_group, nbins = 1000, range = (65., 160.), verbose = True)
    rdf.run(step = 1)
    
    return(rdf.results)

4.1.3) Present RDF Results¶

In [187]:
def present_rdf_results(rdf_data):
    peaks, _ = find_peaks(rdf_data["rdf"], width = 40)
    
    rdf_min = rdf_data["rdf"][peaks[0]]
    rdf_max = rdf_data["rdf"][peaks[1]]
    dist_min = rdf_data["bins"][peaks[0]]
    dist_max = rdf_data["bins"][peaks[1]]
    dist_mid = (dist_min + dist_max) / 2
    thickness = dist_max - dist_min
    
    fig, ax = plt.subplots(figsize = (9.5, 4))
    mpl.rcParams['axes.facecolor'] = (1,1,1,0)
    
    ax.plot(rdf_data["bins"], rdf_data["rdf"])
    
    ax.yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%.1f'))
    
    ax.set_xlabel("Distance from Vesicle Center (Ã…)")
    ax.set_ylabel("g(r)")
    
    ax.plot((dist_min, dist_min), (0., rdf_min),
            color = "tab:blue", linestyle = "--")
    ax.plot((dist_max, dist_max), (0., rdf_max),
            color = "tab:blue", linestyle = "--")
    ax.plot((dist_min, dist_max), (rdf_max, rdf_max), 
            color = "tab:blue", linestyle = "--")
    
    ax.text(dist_max + 1.2, rdf_max - 1, str(np.round(dist_max, 2)))
    ax.text(dist_min + 1.2, rdf_min - 1, str(np.round(dist_min, 2)))
    ax.text(dist_mid - 5, rdf_max + 0.5, str(np.round(thickness, 2)))
    
    plt.show()

4.1.4) Combined RDF Plot¶

In [85]:
def combined_rdf_plot(rdf_results):
    
    fig = plt.figure()
    # Adjust figure size.
    fig.set_size_inches(30, 21) 
    spec = mpl.gridspec.GridSpec(ncols=4, nrows=3) # 6 columns evenly divides both 2 & 3
    
    ax1 = fig.add_subplot(spec[0,0:2]) # row 0 with axes spanning 2 cols on evens
    ax2 = fig.add_subplot(spec[0,2:4])
    ax3 = fig.add_subplot(spec[1,0:2]) # row 0 with axes spanning 2 cols on odds
    ax4 = fig.add_subplot(spec[1,2:4])
    ax5 = fig.add_subplot(spec[2,1:3])
    axes = [ax1, ax2, ax3, ax4, ax5]

    for ii in range(0, 5):
        
        trajectory = "trajectory" + str(ii + 1)
        plot_data = rdf_results[trajectory]
    
        peaks, _ = find_peaks(plot_data["rdf"], width = 40)
        
        rdf_min = plot_data["rdf"][peaks[0]]
        rdf_max = plot_data["rdf"][peaks[1]]
        dist_min = plot_data["bins"][peaks[0]]
        dist_max = plot_data["bins"][peaks[1]]
        dist_mid = (dist_min + dist_max) / 2
        thickness = dist_max - dist_min
        
        axes[ii].plot(plot_data["bins"], plot_data["rdf"])
        
        axes[ii].yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%.1f'))
        
        axes[ii].set_xlabel("Distance from Vesicle Center (Ã…)")
        axes[ii].set_ylabel("g(r)")
        
        axes[ii].plot((dist_min, dist_min), (0., rdf_min),
                color = "tab:blue", linestyle = "--")
        axes[ii].plot((dist_max, dist_max), (0., rdf_max),
                color = "tab:blue", linestyle = "--")
        axes[ii].plot((dist_min, dist_max), (rdf_max, rdf_max), 
                color = "tab:blue", linestyle = "--")
        
        axes[ii].text(dist_max + 1.2, rdf_max - 1, str(np.round(dist_max, 2)))
        axes[ii].text(dist_min + 1.2, rdf_min - 1, str(np.round(dist_min, 2)))
        axes[ii].text(dist_mid - 5, rdf_max + 0.5, str(np.round(thickness, 2)))

    plt.tight_layout()
    plt.show()

4.2) Perform Analysis¶

In [23]:
#trajectories = get_trajectories()
#rdf_results = analyze_trajectories(calculate_rdf, trajectories)
  0%|          | 0/15001 [00:00<?, ?it/s]
  0%|          | 0/15001 [00:00<?, ?it/s]
  0%|          | 0/15001 [00:00<?, ?it/s]
  0%|          | 0/15001 [00:00<?, ?it/s]
  0%|          | 0/15001 [00:00<?, ?it/s]

4.3) Save Results¶

In [25]:
#for key in rdf_results:
#    data = {}
#    data["RDF"] = rdf_results[key]["rdf"]
#    data["Location"] = rdf_results[key]["bins"]
#    data = pd.DataFrame.from_dict(data)
#    data.to_csv("analysis/rdf/" + key + ".csv", index = False)

4.4) Read Saved Results¶

In [66]:
prefix = "analysis/rdf/vesi_dimensions/"
rdf_results = {}
for ii in range(1, 6):
    traj_name = "trajectory" + str(ii)
    data = pd.read_csv(prefix + "trajectory" + str(ii) + ".csv")
    rdf_results[traj_name] = {}
    rdf_results[traj_name]["bins"] = list(data.iloc[:, 1])
    rdf_results[traj_name]["rdf"] = list(data.iloc[:, 0])

4.5) Present Results¶

In [2]:
#present_analysis(present_rdf_results, rdf_results)
In [87]:
combined_rdf_plot(rdf_results)
No description has been provided for this image

4.6) Average Vesicle Dimensions¶

Supplemental collection of data for usage in later analysis.

In [ ]:
upper_radii = [122.81, 122.52, 122.43, 122.14, 122.24]
lower_radii = [81.48, 81.10, 80.63, 81.29, 80.63]
thickness = [41.32, 41.42, 41.80, 40.85, 41.61]

mean_upper_rad = np.mean(upper_radii )
stderr_upper_rad = np.std(upper_radii ) / np.sqrt(5.)
mean_lower_rad = np.mean(lower_radii)
stderr_lower_rad = np.std(lower_radii ) / np.sqrt(5.)
mean_thickness = np.mean(thickness )
stderr_thickness = np.std(thickness) / np.sqrt(5.)
print("The mean upper radius is:", mean_upper_rad, '±', stderr_upper_rad * 2.)
print("The mean lower radius is:", mean_lower_rad, '±', stderr_lower_rad * 2.)
print("The mean thickness is:", mean_thickness, '±', stderr_thickness * 2.)

5) Average Protein Height¶

5.1) Necessary Functions¶

5.1.1) Average Outer Leaflet Radius by Frame¶

In [32]:
def outer_leaflet_height(u):
    # Create selections for groups that may be used for analysis
    selection = "(resname POPC and name PO4 NC3) or " + \
                "(resname POPS and name PO4 CNO) or " + \
                "(resname POP2 and name P1 P2 C1 C2 C3 PO4)"
    head_group = u.select_atoms(selection)
    # Create selections for groups that may be used for analysis
    bilayer = u.select_atoms("resname POPC or resname POPS or " + \
                             "resname POP2 or resname CHOL")
    
    # Collect head group centers of mass.
    head_group_com = np.empty((int(u.trajectory.n_frames) + 1, head_group.n_residues, 3))
    ii = 0
    for ts in u.trajectory:
        # Check progress
        if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
        head_group.positions = head_group.positions - bilayer.center_of_mass()
        head_group_com[ii, :] = head_group.center_of_mass(compound = 'residues')
        ii += 1
    print(' ')
    
    # Get the magnitude of the COM vectors
    dist_from_center = np.sqrt(np.sum(head_group_com[0, :, :] * head_group_com[0, :, :], axis = 1))
    dist_from_center.sort()
    
    # Distribution should be bimodal with a health split between the distributions.
    # We can exploit this to determine at exactly what point the split occurs
    max_diff = np.max(dist_from_center[1:] - dist_from_center[0:-1])
    diff = dist_from_center[1:] - dist_from_center[0:-1]
    ii = 0
    for ii in range(len(diff)):
        if diff[ii] == max_diff:
            break
        ii += 1
    ii += 1
    lower_bound = ii
    
    # Use the determined lower-bound for the upper leaflet lipids to
    # generate a list for selecting the lipids of the upper leaflet
    upper_leaflet_min = dist_from_center[lower_bound]
    dist_from_center = np.sqrt(np.sum(head_group_com[0, :, :] * head_group_com[0, :, :], axis = 1))
    
    upper_selection = dist_from_center >= upper_leaflet_min
    upper_selection = [ii for ii in range(len(upper_selection)) if upper_selection[ii]]
    
    # Define the upper-leaflet lipids
    upper_head_group_com = head_group_com[:, upper_selection, :]
    upper_leaf_radius = []
    
    ii = 0
    for ts in u.trajectory:
        # Check progress
        if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
    
        radius = np.linalg.norm(upper_head_group_com[ii, :, :], axis = 1)
        radius = np.mean(radius)
        upper_leaf_radius.append(radius)
    
        ii += 1
    print(' ')
    
    upper_leaf_radius = np.array(upper_leaf_radius)
    
    return(upper_leaf_radius)

5.1.2) Calculate Average Protein Height¶

In [33]:
def average_protein_height(u):
    upper_leaf_radius = outer_leaflet_height(u)
    # Generate Protein ids and selections
    selection = "(not resname POPC) and " + \
                "(not resname POPS) and " + \
                "(not resname POP2) and " + \
                "(not resname CHOL)"
    # Create selections for groups that may be used for analysis
    protein = u.select_atoms(selection)
    
    letters = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']
    letter_to_molid = dict(zip(letters, list(set(protein.molnums))))
    lines = []
    for let in letter_to_molid:
        lines.append("[ " + let + " ]" + '\n')
        sele_ids = protein.select_atoms("molnum " + str(letter_to_molid[let])).ids
        for ii in range(len(sele_ids) // 15):
            jj = ii * 15
            kk = (ii + 1) * 15
            lines.append(' '.join(list(sele_ids[jj:kk].astype(str))) + '\n')
        jj = kk
        kk = jj + len(sele_ids % 15)
        lines.append(' '.join(list(sele_ids[jj:kk].astype(str))) + '\n')
    #with open("analysis/distance/scripts/index.ndx", 'w') as f:
    #    for l in lines:
    #        f.writelines(l)
    
    #-------------------------------------------------------------------- 
    bilayer = u.select_atoms("resname POPC or " + \
                             "resname POPS or " + \
                             "resname POP2 or " + \
                             "resname CHOL")
    
    # Create selections for groups that may be used for analysis
    protein = u.select_atoms("(not resname POPC) and " + \
                             "(not resname POPS) and " + \
                             "(not resname POP2) and " + \
                             "(not resname CHOL)")
    #-------------------------------------------------------------------- 
    protein_com = np.empty((int(u.trajectory.n_frames) + 1, 10, 3))
    ii = 0
    for ts in u.trajectory:
        # Check progress
        if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
        protein.positions = protein.positions - bilayer.center_of_mass()
        protein_com[ii, :] = protein.center_of_mass(compound = 'molecules')
        ii += 1
    print(' ')
    #-------------------------------------------------------------------- 
    height = []
    
    ii = 0
    for ts in u.trajectory:
        # Check progress
        if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
        
        protein_height = np.linalg.norm(protein_com[ii, :, :], axis = 1)
        protein_height = protein_height
        protein_height = np.mean(protein_height) - upper_leaf_radius[ii]
        
        height.append(protein_height)
    
        ii += 1
    print(' ')
    height = np.array(height)
    
    return(height)

5.1.3) Present Mean A2 Height¶

In [34]:
def present_mean_height(height):
    # Initialize plotting.
    fig, ax = plt.subplots()
    
    # Remove top and right spines from the figure.
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    
    # Adjust figure size.
    fig.set_size_inches(15, 4)
    
    # Plot data.
    ax.plot(time / 10 ** 6, height)
    
    mean_height = np.mean(height[4001:])
    ax.axhline(mean_height, linestyle = "dashed", 
               color = "orange")
    
    mean_height_str = str(np.round(mean_height, 2))
    ax.text(0., mean_height - 4., mean_height_str)
    #ax.annotate(mean_height_str, xy = (-0.5, 15.), 
    #            xytext = (-0.5, 15.))
    
    # Set labels
    ax.set_xlabel("Time (µs)")
    ax.set_ylabel("Protein\nHeight (Ã…)")
    
    plt.show()

5.1.4) Combined Protein Height Plot¶

In [97]:
def combined_average_height_plot(height):
    fig = plt.figure()
    # Adjust figure size.
    fig.set_size_inches(30, 21) 
    spec = mpl.gridspec.GridSpec(ncols=4, nrows=3) # 6 columns evenly divides both 2 & 3
    
    ax1 = fig.add_subplot(spec[0,0:2]) # row 0 with axes spanning 2 cols on evens
    ax2 = fig.add_subplot(spec[0,2:4])
    ax3 = fig.add_subplot(spec[1,0:2]) # row 0 with axes spanning 2 cols on odds
    ax4 = fig.add_subplot(spec[1,2:4])
    ax5 = fig.add_subplot(spec[2,1:3])
    axes = [ax1, ax2, ax3, ax4, ax5]

    for ii in range(0, 5):
        
        trajectory = "trajectory" + str(ii + 1)
        plot_data = height[trajectory]
        
        # Remove top and right spines from the figure.
        axes[ii].spines["top"].set_visible(False)
        axes[ii].spines["right"].set_visible(False)
        
        # Plot data.
        axes[ii].plot(time / 10 ** 6, plot_data)
    
        mean_height = np.mean(plot_data[4001:])
        axes[ii].axhline(mean_height, linestyle = "dashed", 
                   color = "orange")
    
        mean_height_str = str(np.round(mean_height, 2))
        axes[ii].text(0., mean_height - 4., mean_height_str)
    
        # Set labels
        axes[ii].set_xlabel("Time (µs)")
        axes[ii].set_ylabel("Protein\nHeight (Ã…)")
    
    plt.tight_layout()
    plt.show()

5.2) Perform Analysis¶

In [35]:
#trajectories = get_trajectories()
#mean_height_results = analyze_trajectories(average_protein_height, 
#                                           trajectories)
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  

5.3) Save Results¶

In [38]:
#for key in mean_height_results:
#    data = {}
#    data["Mean Height"] = mean_height_results[key]
#    data = pd.DataFrame.from_dict(data)
#    data.to_csv("analysis/mean_height/" + key + ".csv", index = False)

5.4) Read Saved Results¶

In [90]:
prefix = "analysis/mean_height/"
mean_height_results = {}
for ii in range(1, 6):
    traj_name = "trajectory" + str(ii)
    data = pd.read_csv(prefix + "trajectory" + str(ii) + ".csv")
    mean_height_results[traj_name] = list(data.iloc[:, 0])

5.4) Present Results¶

In [99]:
#present_analysis(present_mean_height, mean_height_results)
combined_average_height_plot(mean_height_results)
No description has been provided for this image

6) Individual Protein Heights¶

6.1) Necessary Functions¶

6.1.1) Individual Protein Height Calculation¶

In [42]:
def individual_protein_height(u):
    upper_leaf_radius = outer_leaflet_height(u)
    
    # Create selections for groups that may be used for analysis
    selection = "(not resname POPC) and " + \
                "(not resname POPS) and " + \
                "(not resname POP2) and " + \
                "(not resname CHOL)"
    protein    = u.select_atoms(selection)
    # Create selections for groups that may be used for analysis
    selection = "resname POPC or resname POPS or " + \
                "resname POP2 or resname CHOL"
    bilayer = u.select_atoms(selection)
    
    #--------------------------------------------------------------------
    
    protein_com = np.empty((int(u.trajectory.n_frames) + 1, 10, 3))
    ii = 0
    for ts in u.trajectory:
        # Check progress
        if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
        protein.positions = protein.positions - bilayer.center_of_mass()
        protein_com[ii, :] = protein.center_of_mass(compound = 'molecules')
        ii += 1
    print(' ')
    #--------------------------------------------------------------------
    height = []
    
    ii = 0
    for ts in u.trajectory:
        # Check progress
        if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
        
        protein_height = np.linalg.norm(protein_com[ii, :, :], axis = 1)
        protein_height = protein_height - upper_leaf_radius[ii]
        height.append(protein_height)
    
        ii += 1
    print(' ')
    height = np.array(height)
    
    return(height)

6.1.2) Present Individual A2 Heights¶

In [123]:
def present_individual_height(height):
    np.seterr(all = "ignore")
    title = {0 : 'A', 1 : 'B', 2 : 'C', 3 : 'D', 4 : 'E', 
             5 : 'F', 6 : 'G', 7 : 'H', 8 : 'I', 9 : 'J'}
    
    fig, axs = plt.subplots(5, 2, figsize=(25, 30), sharey = True)
    kk = 0
    for jj in range(0, 5):
        for ii in range(0, 2):
            axs[jj, ii].plot(time / 10 ** 6, height[kk, :], label = "POPC")
            axs[jj, ii].set_title(title[kk])
            if ii == 0:
                axs[jj, ii].set_ylabel("Protein Height (Ã…)")
            if (kk == 8) or (kk == 9):
                axs[jj, ii].set_xlabel("Time (μs)")
            print(np.mean(height[kk, 4001:]))
            kk += 1
        
    plt.tight_layout()
    plt.show()

6.2) Collect Protein Center of Mass Data¶

In [4]:
#trajectories = get_trajectories()
#individual_height_results = analyze_trajectories(individual_protein_height, 
#                                                 trajectories)

6.3) Save Results¶

In [5]:
#for key in individual_height_results:
#    data = {}
#    for ii in range(0, 10):
#        a2_num = str(ii)
#        data[a2_num] = individual_height_results[key][:, ii]
#    data = pd.DataFrame.from_dict(data)
#    data.to_csv("analysis/individual_heights/" + key + ".csv", index = False)

6.4) Read Saved Results¶

In [101]:
prefix = "analysis/individual_heights/"
test_height_results = {}
for ii in range(1, 6):
    traj_name = "trajectory" + str(ii)
    data = pd.read_csv(prefix + "trajectory" + str(ii) + ".csv")
    test_height_results[traj_name] = []
    for jj in range(0, 10):
        test_height_results[traj_name].append(list(data.iloc[:, jj]))
    test_height_results[traj_name] = np.array(test_height_results[traj_name])

6.5) Plot Results¶

In [124]:
present_analysis(present_individual_height, test_height_results)
The following plots correcpond to: trajectory1
16.36141268935273
13.263999098511714
14.673101744303988
16.024557948998932
24.104433819506582
17.929533177219565
16.89226795355231
20.019993026396328
13.122271222091253
41.85036369555732
No description has been provided for this image
The following plots correcpond to: trajectory2
20.81562603128449
16.02955003269995
13.917635309766927
14.040614098694506
22.29378603085873
15.3228433766458
40.09892490280148
20.339787894553815
23.33543307070982
23.31388812162696
No description has been provided for this image
The following plots correcpond to: trajectory3
18.754431424241652
12.46127924252203
12.881562900738954
16.383112860711385
19.607248381589514
25.583151491715647
12.885481718788904
15.04967971842962
13.740588216043191
14.287784206267867
No description has been provided for this image
The following plots correcpond to: trajectory4
44.322678684677456
12.007456283837552
13.204226866701088
20.06152663035018
14.41624750768596
14.55562913187548
19.619597389785287
12.757587773977136
22.867351389740207
20.686046604104686
No description has been provided for this image
The following plots correcpond to: trajectory5
18.503970654599453
12.081922713876983
19.091195351463504
20.767847493631717
18.94753524650009
14.97269797284841
12.528553157370691
22.00702322102428
14.67968328995768
17.588718550617287
No description has been provided for this image

7) A2-A2 Contact Analysis¶

7.1) Necessary Functions¶

7.1.1) Convert from 3 Letter to 1 Letter and Vice Versa¶

In [12]:
convert_aa = np.frompyfunc(mda.lib.util.convert_aa_code, 1, 1)

7.1.2) Count Contacts between A2¶

In [22]:
def protein_protein_contacts(u):
    # Create selections for groups that may be used for analysis
    selection = "(not resname POPC) and " + \
                "(not resname POPS) and " + \
                "(not resname POP2) and " + \
                "(not resname CHOL)" 
    protein    = u.select_atoms(selection)
    # Create selections for groups that may be used for analysis
    selection = "resname POPC or resname POPS or " + \
                "resname POP2 or resname CHOL"
    bilayer = u.select_atoms(selection)
    
    first_a2 = np.min(protein.molnums)
    residue_lookup = protein.atoms[protein.molnums == first_a2].resnums - \
                     first_a2 + 30
    residue_lookup = convert_aa(protein.atoms[protein.molnums == first_a2].resnames) + \
                     residue_lookup.astype(str)
                     
    residue_lookup = list(residue_lookup)
    residue_lookup *= len(np.unique(protein.molnums))
    residue_lookup = np.array(residue_lookup)
    # Used to collect all of the information about each 
    # contact
    contacts = np.array([])
    # Progress tracker
    ii = 0
    for ts in u.trajectory[14000:]:
        # Calculate distances
        if ii % 100 == 0: 
            print(int(((ii / 100)) * 10), "% ", sep = '', end = '')
        search_results = mda.lib.distances.capped_distance(
                                           protein.atoms.positions, 
                                           protein.atoms.positions, 
                                           6.)
        
        contact_1_sele = list(search_results[0][:, 0])
        contact_2_sele = list(search_results[0][:, 1])
        distances      = list(search_results[1])
        
        # ID and replace contacts not from the same monomer
        np.unique((np.array(contact_1_sele) - np.array(contact_2_sele)) > 678, return_counts = True)
        mask = (np.array(contact_1_sele) - np.array(contact_2_sele)) > 678
        condition1 = contact_1_sele * mask != 0
        condition2 = contact_2_sele * mask != 0
        contact_1_sele = np.extract(condition1, contact_1_sele * mask)
        contact_2_sele = np.extract(condition2, contact_2_sele * mask)
        
        if len(distances) > 0:
            contacts = np.concatenate([contacts, residue_lookup[contact_1_sele], 
                                      residue_lookup[contact_2_sele]])
            
        ii +=1
    print('')    
    return(contacts)

7.1.3) Present a Contact Table and Generate a PDB with Percentages¶

In [35]:
def present_a2_a2_contacts(a2_a2_contact_results):
    # Combine all results and pool contact counts
    all_a2_a2_contacts = np.concatenate([a2_a2_contact_results[key] for key in a2_a2_contact_results])
    contacts_counts = np.unique(all_a2_a2_contacts, return_counts = True)
    contacts = contacts_counts[0]
    counts = contacts_counts[1] 
    
    # Collect information for making a table and pdb file
    contact_info = {}
    contact_info["Percentage"] = (counts  / np.sum(counts)) * 100.
    contact_info["Percentage"] = np.round(contact_info["Percentage"], 2).astype(str)
    contact_info["Resnum"] = np.array([ii.strip("ABCDEFGHIJKLMNOPQRSTUVWXYZ") for ii in contacts])
    contact_info["Resname"] = convert_aa(np.array([ii.strip("0123456789") for ii in contacts]))
    contact_info["One Letter"] = convert_aa(contact_info["Resname"])
    
    # Generate the table using pandas and present the top 20 entries
    residue    = contact_info["One Letter"] + \
                 (contact_info["Resnum"].astype(int) + 1).astype(str)
    percentage = contact_info["Percentage"].astype(float)
    
    contact_table = pd.DataFrame.from_dict({"Residue" : residue, 
                                            "Percentage" : percentage})
    contact_table = contact_table.sort_values("Percentage", 
                                              ascending = False, 
                                              ignore_index = True)
    print(contact_table.iloc[:20])
    
    # Generate a pdb file with the b-factors replaced by the % of A2-A2
    # contacts attributable to each residue
    with open("analysis/contact/a2_coarse.pdb", 'r') as f:
        contents = f.readlines()
    with open("analysis/contact/protein_contacts/contact.pdb", 'w') as f:
        for line in contents:
            for ii in range(len(contact_info["Resname"])):
                pattern = r'(^.................'
                pattern = pattern + contact_info["Resname"][ii]
                pattern = pattern + ((6 - len(contact_info["Resnum"][ii])) * '.')
                pattern = pattern + contact_info["Resnum"][ii]
                pattern = pattern + ('.' * 36) + ')' + '0.00' + '(.*)'
                replacement = r'\g<1>' + contact_info["Percentage"][ii] + '\g<2>'
                line = re.sub(pattern, replacement, line)
            f.writelines(line)

7.2) Perform Calculations¶

In [36]:
trajectories = get_trajectories()
a2_a2_contact_results = analyze_trajectories(protein_protein_contacts, 
                                             trajectories)
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 

7.3) Present Table and Generate PDB¶

In [38]:
present_a2_a2_contacts(a2_a2_contact_results)
   Residue  Percentage
0     Y238        3.57
1     K206        2.21
2     P237        2.09
3     Y317        2.01
4     K204        1.95
5      K80        1.88
6     K246        1.84
7     K281        1.83
8      R78        1.79
9      R77        1.61
10     K49        1.53
11     F33        1.45
12    K324        1.38
13    K313        1.37
14    Y311        1.36
15     K47        1.30
16     T79        1.28
17     F73        1.23
18    K249        1.21
19    Q321        1.18

7.4) Visualize Results¶

Open the resulting PDB file and run the spectrum b command to color the system based on the fraction of contacts. To use the pretty viridis color palette instead of the default garbage use the following sequence of commands.
set_color color1, [68, 1, 84]
set_color color2, [49, 104, 142]
set_color color3, [53, 183, 121]
set_color color4, [253, 231, 37]
spectrum b, color1 color2 color3 color4

8) A2-Bilayer Contact Analysis¶

8.1) Necessary Functions¶

8.1.1) Count Contacts Between A2 and Membrane¶

In [24]:
def protein_membrane_contacts(u):
    # Create selections for groups that may be used for analysis
    selection = "(not resname POPC) and " + \
                "(not resname POPS) and " + \
                "(not resname POP2) and " + \
                "(not resname CHOL)" 
    protein    = u.select_atoms(selection)
    # Create selections for groups that may be used for analysis
    selection = "resname POPC or resname POPS or " + \
                "resname POP2 or resname CHOL"
    bilayer = u.select_atoms(selection)
    
    # Generate list to lookup A2 residue number
    # Universe contains wrong residue numbers for each A2
    first_a2 = np.min(protein.molnums)
    residue_lookup = protein.atoms[protein.molnums == first_a2].resnums - \
                     first_a2 + 30
    residue_lookup = convert_aa(protein.atoms[protein.molnums == first_a2].resnames) + \
                     residue_lookup.astype(str)
                     
    residue_lookup = list(residue_lookup)
    residue_lookup *= len(np.unique(protein.molnums))
    residue_lookup = np.array(residue_lookup)
    
    # Used to collect all of the information about each 
    # contact
    protein_contacts = []
    lipid_contacts = []
    
    ii = 0
    for ts in u.trajectory[4000:]:
        if ii % 1100 == 0: 
            print(int(((ii / 1100)) * 10), "% ", sep = '', end = '')
        search_results = mda.lib.distances.capped_distance(protein.positions, 
                                                           bilayer.positions, 6.0)
        protein_sele = list(search_results[0][:, 0])
        lipid_sele = list(search_results[0][:, 1])
        distances = list(search_results[1])
        
        if len(distances) > 0:
            protein_contacts.append(residue_lookup[protein_sele])
            lipid_contacts.append(bilayer.atoms[lipid_sele].resnames)
        ii += 1
    print('')
    protein_contacts = np.concatenate(protein_contacts)
    return(protein_contacts)

8.1.2) Present A2-Membrane Contact Results¶

In [39]:
def present_a2_membrane_contacts(a2_mem_contact_results):
    # Combine all results and pool contact counts
    all_a2_mem_contacts = np.concatenate([a2_mem_contact_results[key] for key in a2_a2_contact_results])
    contacts_counts = np.unique(all_a2_mem_contacts, return_counts = True)
    contacts = contacts_counts[0]
    counts = contacts_counts[1] 
    
    # Collect information for making a table and pdb file
    contact_info = {}
    contact_info["Percentage"] = (counts  / np.sum(counts)) * 100.
    contact_info["Percentage"] = np.round(contact_info["Percentage"], 2).astype(str)
    contact_info["Resnum"] = np.array([ii.strip("ABCDEFGHIJKLMNOPQRSTUVWXYZ") for ii in contacts])
    contact_info["Resname"] = convert_aa(np.array([ii.strip("0123456789") for ii in contacts]))
    contact_info["One Letter"] = convert_aa(contact_info["Resname"])
    
    # Generate the table using pandas and present the top 20 entries
    residue    = contact_info["One Letter"] + \
                 (contact_info["Resnum"].astype(int) + 1).astype(str)
    percentage = contact_info["Percentage"].astype(float)
    
    contact_table = pd.DataFrame.from_dict({"Residue" : residue, 
                                            "Percentage" : percentage})
    contact_table = contact_table.sort_values("Percentage", 
                                              ascending = False, 
                                              ignore_index = True)
    print(contact_table.iloc[:20])
    
    # Generate a pdb file with the b-factors replaced by the % of A2-A2
    # contacts attributable to each residue
    with open("analysis/contact/a2_coarse.pdb", 'r') as f:
        contents = f.readlines()
    with open("analysis/contact/membrane_contacts/contact.pdb", 'w') as f:
        for line in contents:
            for ii in range(len(contact_info["Resname"])):
                pattern = r'(^.................'
                pattern = pattern + contact_info["Resname"][ii]
                pattern = pattern + ((6 - len(contact_info["Resnum"][ii])) * '.')
                pattern = pattern + contact_info["Resnum"][ii]
                pattern = pattern + ('.' * 36) + ')' + '0.00' + '(.*)'
                replacement = r'\g<1>' + contact_info["Percentage"][ii] + '\g<2>'
                line = re.sub(pattern, replacement, line)
            f.writelines(line)

8.2) Perform Analysis¶

In [25]:
trajectories = get_trajectories()
a2_mem_contact_results = analyze_trajectories(protein_membrane_contacts, 
                                              trajectories)
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 

8.3) Present Table and Generate PDB¶

In [41]:
present_a2_membrane_contacts(a2_mem_contact_results)
   Residue  Percentage
0     K119        3.49
1     K206        3.45
2     K281        3.30
3      K49        3.24
4     K249        3.07
5     L121        2.95
6      K88        2.38
7     K246        2.35
8     Y151        2.34
9     K152        2.20
10     H94        2.15
11    R284        2.13
12    K324        1.98
13     S89        1.93
14     S92        1.88
15     K47        1.87
16    S161        1.79
17     K81        1.78
18    R245        1.78
19    K157        1.72
Residue       K119K206K281K49K249L121K88K246Y151K152H94R284K...
Percentage                                                47.78
dtype: object

8.4) Visualize Results¶

Open the resulting PDB file and run the spectrum b command to color the system based on the fraction of contacts.

9) Track Contacts Over Time¶

Plot fraction of contacts and total contacts for individual A2 with POPC, POPS, POP2, and CHOL over time.

9.1) Define Functions¶

9.1.1) Convert 1 to 3 Letter AA Code and Vice Versa¶

In [16]:
convert_aa = np.frompyfunc(mda.lib.util.convert_aa_code, 1, 1)
In [276]:
u = mda.Universe('trajectory/image1.tpr', 'trajectory/reimaged1.xtc')

u.trajectory[333]

# Create selections for groups that may be used for analysis
selection = "(not resname POPC) and " + \
            "(not resname POPS) and " + \
            "(not resname POP2) and " + \
            "(not resname CHOL)" 
protein    = u.select_atoms(selection)
# Create selections for groups that may be used for analysis
selection = "resname POPC or resname POPS or " + \
            "resname POP2 or resname CHOL"
selection = "resname POP2"
bilayer = u.select_atoms(selection)
protein_molnums = list(set(protein.molnums))
protein_molnums.sort()
I_molnum = protein_molnums[-2]
I_molnum 
a2_sele = protein.molnums == I_molnum
search_results = mda.lib.distances.capped_distance(
                                   protein.atoms[a2_sele].positions, 
                                   bilayer.positions, 
                                   6.)
lipid_sele = list(search_results[0][:, 1])
lipid_num = bilayer.atoms.resnums[lipid_sele]
len(lipid_num)
#len(set(u.select_atoms("resname POP2").resnums))
Out[276]:
35
In [223]:
u = mda.Universe('trajectory/image1.tpr', 'trajectory/reimaged1.xtc')
selection = "(not resname POPC) and " + \
            "(not resname POPS) and " + \
            "(not resname POP2) and " + \
            "(not resname CHOL)" 
protein    = u.select_atoms(selection)
# Create selections for groups that may be used for analysis
selection = "resname POPC or resname POPS or " + \
            "resname POP2 or resname CHOL"
bilayer = u.select_atoms(selection)

I_molnum = 
Out[223]:
<Universe with 58660 atoms>

9.1.2) Calculate the A2-Membrane Contact Counts¶

In [18]:
def lipid_contacts(u):
    # Create selections for groups that may be used for analysis
    selection = "(not resname POPC) and " + \
                "(not resname POPS) and " + \
                "(not resname POP2) and " + \
                "(not resname CHOL)" 
    protein    = u.select_atoms(selection)
    # Create selections for groups that may be used for analysis
    selection = "resname POPC or resname POPS or " + \
                "resname POP2 or resname CHOL"
    bilayer = u.select_atoms(selection)
    #--------------------------------------------------------------------
    # Proteins to select for contact analysis
    #              A  C  D  E  F  G  I  J
    protein_molnums = list(set(protein.molnums))
    protein_molnums.sort()
    protein_codes = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']
    protein_codes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    molnum_to_code = dict(zip(protein_molnums, protein_codes))
    #--------------------------------------------------------------------
    # Aggregate information for each A2
    aggregated_contact_info = {}
    for molnum in molnum_to_code:
        code = molnum_to_code[molnum]
        a2_sele = protein.molnums == molnum
        a2_contacts = {"POPC" : [],  "POPS" : [], "POP2" : [], 
                       "CHOL" : [], "Total" : []}
        ii = 0
        for ts in u.trajectory:
            if ii % track_divisor == 0: 
                print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
            # Calculate contacts
            search_results = mda.lib.distances.capped_distance(
                                               protein.atoms[a2_sele].positions, 
                                               bilayer.positions, 
                                               6.)
            
            lipid_sele = list(search_results[0][:, 1])
            
            lipid_name = bilayer.atoms.resnames[lipid_sele]
            a2_contacts["POPC"].append(np.count_nonzero(lipid_name == "POPC"))
            a2_contacts["POPS"].append(np.count_nonzero(lipid_name == "POPS"))
            a2_contacts["POP2"].append(np.count_nonzero(lipid_name == "POP2"))
            a2_contacts["CHOL"].append(np.count_nonzero(lipid_name == "CHOL"))
            ii += 1
        a2_contacts["POPC"] = np.float64(np.array(a2_contacts["POPC"]))
        a2_contacts["POPS"] = np.float64(np.array(a2_contacts["POPS"]))
        a2_contacts["POP2"] = np.float64(np.array(a2_contacts["POP2"]))
        a2_contacts["CHOL"] = np.float64(np.array(a2_contacts["CHOL"]))
        a2_contacts["Total"] = a2_contacts["POPC"] + a2_contacts["POPS"] + \
                               a2_contacts["POP2"] + a2_contacts["CHOL"] 
        a2_contacts["Total"] = np.float64(a2_contacts["Total"])
        aggregated_contact_info[code] = a2_contacts
        print('')
    return(aggregated_contact_info)

9.1.3) Present the Contact Counts for the First 6 us of the Trajectory¶

In [11]:
def present_early_mem_contacts(aggregated_contact_info):
    np.seterr(all = "ignore")
    title = {0 : 'A', 1 : 'B', 2 : 'C', 3 : 'D', 4 : 'E', 
             5 : 'F', 6 : 'G', 7 : 'H', 8 : 'I', 9 : 'J'}
    
    fig, axs = plt.subplots(5, 2, figsize=(25, 30), sharey = True)
    kk = 0
    for jj in range(0, 5):
        for ii in range(0, 2):
            num_popc = np.nan_to_num(aggregated_contact_info[kk]["POPC"])
            num_pops = np.nan_to_num(aggregated_contact_info[kk]["POPS"])
            num_pop2 = np.nan_to_num(aggregated_contact_info[kk]["POP2"])
            num_chol = np.nan_to_num(aggregated_contact_info[kk]["CHOL"])
            axs[jj, ii].plot(time[:6000] / (10 ** 6), num_popc[:6000], label = "POC")
            axs[jj, ii].plot(time[:6000] / (10 ** 6), num_pops[:6000], label = "POS")
            axs[jj, ii].plot(time[:6000] / (10 ** 6), num_pop2[:6000], label = "PIP2")
            axs[jj, ii].plot(time[:6000] / (10 ** 6), num_chol[:6000], label = "CHL")
            axs[jj, ii].set_title(title[kk])
            axs[jj, ii].set_ylim([-10, 325])
            
            if ii == 0:
                axs[jj, ii].set_ylabel("Contact Count")
            if (kk == 8) or (kk == 9):
                axs[jj, ii].set_xlabel("Time (μs)")
            kk += 1
        
    handles, labels = axs[0, 0].get_legend_handles_labels()
    fig.legend(handles, labels, loc = "outside center right", 
               bbox_to_anchor = (1.08, 0.5))
    plt.tight_layout()
    plt.show()

9.1.4) Present Contact Count for the Full Trajectory¶

In [12]:
def present_late_mem_contacts(aggregated_contact_info):
    np.seterr(all = "ignore")
    title = {0 : 'A', 1 : 'B', 2 : 'C', 3 : 'D', 4 : 'E', 
             5 : 'F', 6 : 'G', 7 : 'H', 8 : 'I', 9 : 'J'}
    
    fig, axs = plt.subplots(5, 2, figsize=(25, 30), sharey = True)
    kk = 0
    for jj in range(0, 5):
        for ii in range(0, 2):
            num_popc = np.nan_to_num(aggregated_contact_info[kk]["POPC"])
            num_pops = np.nan_to_num(aggregated_contact_info[kk]["POPS"])
            num_pop2 = np.nan_to_num(aggregated_contact_info[kk]["POP2"])
            num_chol = np.nan_to_num(aggregated_contact_info[kk]["CHOL"])
            axs[jj, ii].plot(time / (10 ** 6), num_popc, label = "POC")
            axs[jj, ii].plot(time / (10 ** 6), num_pops, label = "POS")
            axs[jj, ii].plot(time / (10 ** 6), num_pop2, label = "PIP2")
            axs[jj, ii].plot(time / (10 ** 6), num_chol, label = "CHL")
            axs[jj, ii].set_title(title[kk])
            axs[jj, ii].set_ylim([-10, 325])
            
            if ii == 0:
                axs[jj, ii].set_ylabel("Contact Count")
            if (kk == 8) or (kk == 9):
                axs[jj, ii].set_xlabel("Time (μs)")
            kk += 1
        
    handles, labels = axs[0, 0].get_legend_handles_labels()
    fig.legend(handles, labels, loc = "outside center right", 
               bbox_to_anchor = (1.08, 0.5))
    plt.tight_layout()
    plt.show()

9.1.5) Present the Percentage of Contacts¶

In [13]:
def present_fraction_mem_contacts(aggregated_contact_info):
    np.seterr(all = "ignore")
    title = {0 : 'A', 1 : 'B', 2 : 'C', 3 : 'D', 4 : 'E', 
             5 : 'F', 6 : 'G', 7 : 'H', 8 : 'I', 9 : 'J'}
    
    fig, axs = plt.subplots(5, 2, figsize=(25, 30), sharey = True)
    kk = 0
    for jj in range(0, 5):
        for ii in range(0, 2):
            num_popc = np.nan_to_num(aggregated_contact_info[kk]["POPC"])
            num_pops = np.nan_to_num(aggregated_contact_info[kk]["POPS"])
            num_pop2 = np.nan_to_num(aggregated_contact_info[kk]["POP2"])
            num_chol = np.nan_to_num(aggregated_contact_info[kk]["CHOL"])
            total_contacts = num_popc + num_pops + num_pop2 + num_chol
            frac_popc = (num_popc / total_contacts) * 100. 
            frac_pops = (num_pops / total_contacts) * 100. 
            frac_pop2 = (num_pop2 / total_contacts) * 100. 
            frac_chol = (num_chol / total_contacts) * 100. 
            axs[jj, ii].plot(time / (10 ** 6), frac_popc, label = "POC")
            axs[jj, ii].plot(time / (10 ** 6), frac_pops, label = "POS")
            axs[jj, ii].plot(time / (10 ** 6), frac_pop2, label = "PIP2")
            axs[jj, ii].plot(time / (10 ** 6), frac_chol, label = "CHL")
            axs[jj, ii].set_title(title[kk])
            
            axs[jj, ii].axhline(50., linestyle = "dashed", color = "black")
            axs[jj, ii].axhline(75., linestyle = "dashed", color = "black")
            axs[jj, ii].axhline(90., linestyle = "dashed", color = "black")
            
            if ii == 0:
                axs[jj, ii].set_ylabel("Contact Percentage")
            if (kk == 8) or (kk == 9):
                axs[jj, ii].set_xlabel("Time (μs)")
            kk += 1
        
    handles, labels = axs[0, 0].get_legend_handles_labels()
    fig.legend(handles, labels, loc = "outside center right", 
               bbox_to_anchor = (1.08, 0.5))
    plt.tight_layout()
    plt.show()

9.2) Perform Analysis¶

In [19]:
#trajectories = get_trajectories()
#lipid_contact_results = analyze_trajectories(lipid_contacts, 
#                                             trajectories)
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 

9.3) Save Results¶

In [20]:
#for key in lipid_contact_results:
#    data = {}
#    for ii in range(0, 10):
#        data["POPC" + str(ii)] = lipid_contact_results[key][ii]["POPC"]
#        data["POPS" + str(ii)] = lipid_contact_results[key][ii]["POPS"]
#        data["POP2" + str(ii)] = lipid_contact_results[key][ii]["POP2"]
#        data["CHOL" + str(ii)] = lipid_contact_results[key][ii]["CHOL"]
#        data["Total" + str(ii)] = lipid_contact_results[key][ii]["Total"]
#    data = pd.DataFrame.from_dict(data)
#    data.to_csv("analysis/contact_count/" + key + ".csv", index = False)

9.4) Read Results¶

In [14]:
prefix = "analysis/contact_count/"
lipid_contact_results = {}
for ii in range(1, 6):
    traj_name = "trajectory" + str(ii)
    lipid_contact_results[traj_name] = {}
    for jj in range(0, 10):
        data = pd.read_csv(prefix + "trajectory" + str(ii) + ".csv")
        lipid_contact_results[traj_name][jj] = {}
        lipid_contact_results[traj_name][jj]["POPC"] = np.array(list(data["POPC" + str(jj)]))
        lipid_contact_results[traj_name][jj]["POPS"] = np.array(list(data["POPS" + str(jj)]))
        lipid_contact_results[traj_name][jj]["POP2"] = np.array(list(data["POP2" + str(jj)]))
        lipid_contact_results[traj_name][jj]["CHOL"] = np.array(list(data["CHOL" + str(jj)]))
        lipid_contact_results[traj_name][jj]["Total"] = np.array(list(data["Total" + str(jj)]))

9.5) Present Results¶

9.5.1) Plot First 6us of Contacts¶

In [15]:
present_analysis(present_early_mem_contacts, lipid_contact_results)
The following plots correcpond to: trajectory1
No description has been provided for this image
The following plots correcpond to: trajectory2
No description has been provided for this image
The following plots correcpond to: trajectory3
No description has been provided for this image
The following plots correcpond to: trajectory4
No description has been provided for this image
The following plots correcpond to: trajectory5
No description has been provided for this image

9.5.2) Plot All Contacts¶

In [207]:
present_analysis(present_late_mem_contacts, lipid_contact_results)
The following plots correcpond to: trajectory1
No description has been provided for this image
The following plots correcpond to: trajectory2
No description has been provided for this image
The following plots correcpond to: trajectory3
No description has been provided for this image
The following plots correcpond to: trajectory4
No description has been provided for this image
The following plots correcpond to: trajectory5
No description has been provided for this image

9.5.3) Plot Percentage of Contacts¶

In [16]:
present_analysis(present_fraction_mem_contacts, lipid_contact_results)
The following plots correcpond to: trajectory1
No description has been provided for this image
The following plots correcpond to: trajectory2
No description has been provided for this image
The following plots correcpond to: trajectory3
No description has been provided for this image
The following plots correcpond to: trajectory4
No description has been provided for this image
The following plots correcpond to: trajectory5
No description has been provided for this image

10) Lipid Frechet Mean for Clustering¶

10.1) Necessary Functions¶

10.1.1) Calculate the Frechet Variance¶

In [134]:
def calculate_lipid_variance(u):
    def avg_pairwise_distance_squared(x):
        angles = np.sum(frame * x, axis = 1)
        angles = angles / \
                 (np.linalg.norm(x) * \
                  np.linalg.norm(frame, axis = 1))
        angles = np.arccos(angles)
        angles = np.nan_to_num(angles)
        arclengths = angles * average_upper_radius
        dist_squared = arclengths * arclengths 
        return(np.mean(dist_squared))
    frame_skip = 10
    average_upper_radius = 122.
    # Create selections for groups that may be used for analysis
    selection = "(resname POPC and name PO4 NC3) or "            + \
                "(resname POPS and name PO4 CNO) or "            + \
                "(resname POP2 and name P1 P2 C1 C2 C3 PO4) or " + \
                "(resname CHOL)" 
    head_group = u.select_atoms(selection)
    # Create selections for groups that may be used for analysis
    bilayer = u.select_atoms("resname POPC or resname POPS or " + \
                             "resname POP2 or resname CHOL")
    
    lipid_head_com = {}
    #for lip in ["POPC", "POPS", "POP2", "CHOL"]:
    #for lip in ["POPC"]:
    for lip in ["POPC", "POPS", "POP2", "CHOL"]:
        # Generate selection and np.array to store data.
        lipid_sele = head_group.select_atoms("resname " + lip)
        lipid_head_com[lip] = np.empty
        lipid_head_com[lip] = np.empty((int(u.trajectory.n_frames) + 1, 
                                  lipid_sele.n_residues, 
                                  3))
        ii = 0
        for ts in u.trajectory:
            # Check progress
            #if ii % track_divisor == 0: 
            if ii % 1500 == 0: 
                print(int(((ii / track_divisor) * 10)), "% ", sep = '', end = '')
            lipid_sele.positions  = lipid_sele.positions - bilayer.center_of_mass()
            lipid_head_com[lip][ii, :] = lipid_sele.center_of_mass(compound = 'residues')
            ii += 1
        print(' ')
        
    for lip in ["POPC", "POPS", "POP2", "CHOL"]:
        head_com = lipid_head_com[lip]
        
        # Get the magnitude of the COM vectors
        dist_from_center = np.sqrt(np.sum(head_com[0, :, :] * head_com[0, :, :], axis = 1))
        # Use the average magnitude to seperate the distributions
        upper_leaf_min = np.mean(dist_from_center)
        upper_selection = dist_from_center > upper_leaf_min
        dist_from_center = np.sqrt(np.sum(head_com[0, :, :] * head_com[0, :, :], axis = 1))
        upper_selection = [ii for ii in range(len(upper_selection)) if upper_selection[ii]]
        
        # Define the upper-leaflet lipids
        lipid_head_com[lip] = head_com[:, upper_selection, :]
        
    variance_results = {}
    for lip in ["POPC", "POPS", "CHOL", "POP2"]:
        variance_results[lip] = []
        ii = 0
        
        for ts in u.trajectory:
            # Check progress
            #if ii % track_divisor == 0: 
            if ii % 1500 == 0: 
                print(int(((ii / track_divisor)) * 10), "% ", 
                      sep = '', end = '')
                
            frame = lipid_head_com[lip][ii, :, :]
            frame = frame / np.linalg.norm(frame, axis = 1)[:, None]
            frame = frame * average_upper_radius
            cons = ({'type': 'eq', 'fun': lambda x: np.sqrt((x[0] ** 2) + (x[1] ** 2) + (x[2] ** 2)) - average_upper_radius})
            optimization_results = []
            
            aur = average_upper_radius
            guesses = [[0, 0, aur], [0, 0, -aur],
                       [0, aur, 0], [0, -aur, 0],
                       [aur, 0, 0], [-aur, 0, 0]]
            for jj in range(len(guesses)):
                results = scipy.optimize.minimize(avg_pairwise_distance_squared, guesses[jj], constraints = cons)
                optimization_results.append(np.append(results['x'], results["fun"]))
            
            optimization_results = np.array(optimization_results)
            optimization_results[optimization_results[:, 3].argsort()]
            variance = optimization_results[0, 3]
            variance_results[lip].append(variance)
    
            ii += 1
        print(' ')
        
    return(variance_results)

10.1.2) Present the Frechet Variance Results¶

In [46]:
def present_lipid_variance(variance_results):
    # Initialize plotting.
    fig, ax = plt.subplots(figsize = (14, 7))
    
    # Remove top and right spines from the figure.
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    
    # Adjust figure size.
    fig.set_size_inches(15, 4)
    
    ax.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
    
    # Plot data.
    ax.plot(time / 10 ** 6, variance_results["POPC"], label = "POC")
    ax.plot(time / 10 ** 6, variance_results["POPS"], label = "POS")
    ax.plot(time / 10 ** 6, variance_results["CHOL"], label = "CHL")
    ax.plot(time / 10 ** 6, variance_results["POP2"], label = "PIP2")
    
    ax.set_ylim([20000, 47500])
    
    ax.set_xlabel("Time (µs)")
    ax.set_ylabel("Variance (Ų)")
    
    ax.legend()
    
    plt.show()

10.1.3) Combined Lipid Variance Results¶

In [49]:
def present_combined_lipid_variance(lipid_variance_results):
    fig = plt.figure()
    # Adjust figure size.
    fig.set_size_inches(27, 21) 
    spec = mpl.gridspec.GridSpec(ncols=4, nrows=3) # 6 columns evenly divides both 2 & 3
    
    ax1 = fig.add_subplot(spec[0,0:2]) # row 0 with axes spanning 2 cols on evens
    ax2 = fig.add_subplot(spec[0,2:4])
    ax3 = fig.add_subplot(spec[1,0:2]) # row 0 with axes spanning 2 cols on odds
    ax4 = fig.add_subplot(spec[1,2:4])
    ax5 = fig.add_subplot(spec[2,1:3])
    axes = [ax1, ax2, ax3, ax4, ax5]
    
    for ii in range(0, 5):
        traj = "trajectory" + str(ii + 1)
        lip_var = lipid_variance_results[traj]
        #prot_var = protein_variance_results[traj]
    
        # Remove top and right spines from the figure.
        axes[ii].spines["top"].set_visible(False)
        axes[ii].spines["right"].set_visible(False)
    
        # Plot data.
        axes[ii].plot(time / 10 ** 6, lip_var["POPC"], label = "POC")
        axes[ii].plot(time / 10 ** 6, lip_var["POPS"], label = "POS")
        axes[ii].plot(time / 10 ** 6, lip_var["CHOL"], label = "CHL")
        axes[ii].plot(time / 10 ** 6, lip_var["POP2"], label = "PIP2")
    
        axes[ii].set_ylim([20000, 47500])
    
        axes[ii].set_xlabel("Time (µs)")
        axes[ii].set_ylabel("Variance (Ų)")
        
        axes[ii].legend()
    
    plt.tight_layout()
    plt.show()

10.2) Perform Analysis¶

In [6]:
#trajectories = get_trajectories()
#lipid_variance_results = analyze_trajectories(calculate_lipid_variance, 
#                                              trajectories)

10.3) Save Results¶

In [7]:
#for key in lipid_variance_results:
#    data = {}
#    data["POPC"] = lipid_variance_results[key]["POPC"]
#    data["POPS"] = lipid_variance_results[key]["POPS"]
#    data["POP2"] = lipid_variance_results[key]["POP2"]
#    data["CHOL"] = lipid_variance_results[key]["CHOL"]
#    data = pd.DataFrame.from_dict(data)
#    data.to_csv("analysis/variance/lipids/" + key + ".csv", 
#                index = False)

10.4) Read Results¶

In [19]:
prefix = "analysis/variance/lipids/"
lipid_variance_results = {}
for ii in range(1, 6):
    traj_name = "trajectory" + str(ii)
    data = pd.read_csv(prefix + "trajectory" + str(ii) + ".csv")
    lipid_variance_results[traj_name] = {}
    lipid_variance_results[traj_name]["POPC"] = list(data.iloc[:, 0])
    lipid_variance_results[traj_name]["POPS"] = list(data.iloc[:, 1])
    lipid_variance_results[traj_name]["POP2"] = list(data.iloc[:, 2])
    lipid_variance_results[traj_name]["CHOL"] = list(data.iloc[:, 3])

10.5) Present Results¶

In [39]:
present_analysis(present_lipid_variance, lipid_variance_results)
The following plots correcpond to: trajectory1
No description has been provided for this image
The following plots correcpond to: trajectory2
No description has been provided for this image
The following plots correcpond to: trajectory3
No description has been provided for this image
The following plots correcpond to: trajectory4
No description has been provided for this image
The following plots correcpond to: trajectory5
No description has been provided for this image
In [50]:
present_combined_lipid_variance(lipid_variance_results)
No description has been provided for this image

11) Protein Frechet Mean for Clustering¶

11.1) Necessary Functions¶

11.1.1) Calculate the Frechet Variance¶

In [149]:
def calculate_protein_variance(u):
    average_upper_radius = 122.
    
    bilayer = u.select_atoms("resname POPC or " + \
                             "resname POPS or " + \
                             "resname POP2 or " + \
                             "resname CHOL")
    
    # Create selections for groups that may be used for analysis
    protein = u.select_atoms("(not resname POPC) and " + \
                             "(not resname POPS) and " + \
                             "(not resname POP2) and " + \
                             "(not resname CHOL)")
    #------------------------------------------
    #protein_com    = np.empty((protein.n_residues, int(u.trajectory.n_frames / 100), 3))
    protein_com    = np.empty((int(u.trajectory.n_frames) + 1, 10, 3))
    ii = 0
    for ts in u.trajectory:
        # Check progress
        #if ii % track_divisor == 0: 
        if ii % 1500 == 0: 
            print(int(((ii / track_divisor) * 10)), "% ", sep = '', end = '')
        protein.positions = protein.positions - bilayer.center_of_mass()
        protein_com[ii, :] = protein.center_of_mass(compound = 'molecules')
        ii += 1
    print(' ')
    #------------------------------------------
    def avg_pairwise_distance_squared(x):
        angles = np.sum(frame * x, axis = 1)
        angles = angles / \
                 (np.linalg.norm(x) * \
                  np.linalg.norm(frame, axis = 1))
        angles = np.arccos(angles)
        angles = np.nan_to_num(angles)
        arclengths = angles * average_upper_radius
        dist_squared = arclengths * arclengths 
        return(np.mean(dist_squared))
    
    protein_variance_results = []
    ii = 0
    for ts in u.trajectory:
        # Check progress
        #if ii % track_divisor == 0: 
        if ii % 1500 == 0: 
            print(int(((ii / track_divisor) * 10)), "% ", sep = '', end = '')
        
        frame = protein_com[ii, :, :]
        cons = ({'type': 'eq', 'fun': lambda x: np.sqrt((x[0] ** 2) + (x[1] ** 2) + (x[2] ** 2)) - average_upper_radius})
        optimization_results = []
        
        aur = average_upper_radius
        guesses = [[0, 0, aur], [0, 0, -aur],
                   [0, aur, 0], [0, -aur, 0],
                   [aur, 0, 0], [-aur, 0, 0]]
        for jj in range(len(guesses)):
            results = scipy.optimize.minimize(avg_pairwise_distance_squared, guesses[jj], constraints = cons)
            optimization_results.append(np.append(results['x'], results["fun"]))
        
        optimization_results = np.array(optimization_results)
        optimization_results[optimization_results[:, 3].argsort()]
        variance = optimization_results[0, 3]
        protein_variance_results.append(variance)
    
        ii += 1
    print(' ')
    return(protein_variance_results)

11.1.2) Present the Frechet Variance¶

In [160]:
def present_protein_variance(variance_results):
    # Initialize plotting.
    fig, ax = plt.subplots(figsize = (15, 4))
    
    # Remove top and right spines from the figure.
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    
    # Plot data.
    ax.plot(time, variance_results)
    
    ax.set_xlabel("Time (ns)")
    ax.set_ylabel("Variance (Ų)")
    
    ax.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
    
    plt.show()

11.2) Perform Calculations¶

In [150]:
#trajectories = get_trajectories()
#protein_variance_results = analyze_trajectories(calculate_protein_variance, 
#                                                trajectories)
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  

11.3) Save Results¶

In [154]:
#for key in protein_variance_results:
#    data = {}
#    data["A2"] = protein_variance_results[key]
#    data = pd.DataFrame.from_dict(data)
#    data.to_csv("analysis/variance/protein/" + key + ".csv", 
#                index = False)

11.4) Read Results¶

In [27]:
prefix = "analysis/variance/protein/"
protein_variance_results = {}
for ii in range(1, 6):
    traj_name = "trajectory" + str(ii)
    data = pd.read_csv(prefix + "trajectory" + str(ii) + ".csv")
    protein_variance_results[traj_name] = {}
    protein_variance_results[traj_name] = list(data.iloc[:, 0])

11.5) Present Results¶

In [162]:
present_analysis(present_protein_variance, protein_variance_results)
The following plots correcpond to: trajectory1
No description has been provided for this image
The following plots correcpond to: trajectory2
No description has been provided for this image
The following plots correcpond to: trajectory3
No description has been provided for this image
The following plots correcpond to: trajectory4
No description has been provided for this image
The following plots correcpond to: trajectory5
No description has been provided for this image

12) Lipid-Protein Variance Relationship¶

12.1) Overlain Variance vs Time Plots¶

In [31]:
for key in protein_variance_results:
    print("The following plots correspond to:", key)
    print(scipy.stats.pearsonr(protein_variance_results[key][:], 
                         lipid_variance_results[key]["POP2"][:]))
    
    # Initialize plotting.
    fig, ax1 = plt.subplots(figsize = (7, 5))
    
    # Remove top and right spines from the figure.
    ax1.spines["top"].set_visible(False)
    ax1.spines["right"].set_visible(False)
    
    color = 'C4'
    ax1.set_xlabel('Time (µs)')
    ax1.set_ylabel('A2 Variance (Ų)')
    ax1.tick_params(axis='y')
    ax1.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
    
    plt1 = ax1.plot(time[:] / (10 ** 6), protein_variance_results[key][:], 
                    color = color, label = "A2")
    
    ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
    
    color = 'C3'
    ax2.set_ylabel('PIP2 Variance (Ų)')
    ax2.tick_params(axis = 'y')
    ax2.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
    
    plt2 = ax2.plot(time[:] / (10 ** 6), lipid_variance_results[key]["POP2"][:], 
                    color = color, label = "PIP2")
    
    # added these three lines
    lns = plt1 + plt2
    labels = [l.get_label() for l in lns]
    ax2.legend(lns, labels)
    
    fig.tight_layout()  # otherwise the right y-label is slightly clipped
    plt.show()
The following plots correspond to: trajectory1
PearsonRResult(statistic=0.8098302446204506, pvalue=0.0)
No description has been provided for this image
The following plots correspond to: trajectory2
PearsonRResult(statistic=0.8910302251798473, pvalue=0.0)
No description has been provided for this image
The following plots correspond to: trajectory3
PearsonRResult(statistic=0.8000298892394614, pvalue=0.0)
No description has been provided for this image
The following plots correspond to: trajectory4
PearsonRResult(statistic=0.7429505201181323, pvalue=0.0)
No description has been provided for this image
The following plots correspond to: trajectory5
PearsonRResult(statistic=0.9361842846397979, pvalue=0.0)
No description has been provided for this image
In [30]:
fig = plt.figure()
# Adjust figure size.
fig.set_size_inches(27, 21) 
spec = mpl.gridspec.GridSpec(ncols=4, nrows=3) # 6 columns evenly divides both 2 & 3

ax1 = fig.add_subplot(spec[0,0:2]) # row 0 with axes spanning 2 cols on evens
ax2 = fig.add_subplot(spec[0,2:4])
ax3 = fig.add_subplot(spec[1,0:2]) # row 0 with axes spanning 2 cols on odds
ax4 = fig.add_subplot(spec[1,2:4])
ax5 = fig.add_subplot(spec[2,1:3])
axes = [ax1, ax2, ax3, ax4, ax5]

for ii in range(0, 5):
    key = "trajectory" + str(ii + 1)
    kk = ii + 5
    
    # Remove top and right spines from the figure.
    #axes[ii].spines["top"].set_visible(False)
    #axes[ii].spines["right"].set_visible(False)
    
    color = 'C4'
    axes[ii].set_xlabel('Time (µs)')
    axes[ii].set_ylabel('A2 Variance (Ų)')
    axes[ii].tick_params(axis='y')
    axes[ii].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
    
    plt1 = axes[ii].plot(time[:] / (10 ** 6), protein_variance_results[key][:], 
                         color = color, label = "A2")
    
    axes.append(axes[ii].twinx()) # instantiate a second axes that shares the same x-axis
    
    color = 'C3'
    axes[kk].set_ylabel('PIP2 Variance (Ų)')
    axes[kk].tick_params(axis = 'y')
    axes[kk].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
    
    plt2 = axes[kk].plot(time[:] / (10 ** 6), lipid_variance_results[key]["POP2"][:], 
                         color = color, label = "PIP2")
    
    # added these three lines
    lns = plt1 + plt2
    labels = [l.get_label() for l in lns]
    axes[kk].legend(lns, labels)
    
fig.tight_layout()  # otherwise the right y-label is slightly clipped
plt.show()
No description has been provided for this image

12.2) Lipid vs Protein Variance¶

In [32]:
for key in protein_variance_results:
    fig, ax = plt.subplots(figsize = (7, 5))
    
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    
    ax.scatter(protein_variance_results[key],
               lipid_variance_results[key]["POP2"])
    
    ax.set_xlabel("A2 Variance (Ų)")
    ax.set_ylabel("PIP2 Variance (Ų)")
    
    z = np.polyfit(protein_variance_results[key],
                   lipid_variance_results[key]["POP2"],
                   1)
    p = np.poly1d(z)
    
    ax.plot(protein_variance_results[key], 
            p(protein_variance_results[key]), ':',
            color = "orange")
    
    ax.ticklabel_format(style = "sci", axis = 'y', 
                       scilimits = (0, 0))
    ax.ticklabel_format(style = "sci", axis = 'x', 
                       scilimits = (0, 0))
    
    slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(protein_variance_results[key], 
                                                                         lipid_variance_results[key]["POP2"])
    print(r_value, p_value)
    
    plt.show()
0.8098302446204508 0.0
No description has been provided for this image
0.8910302251798478 0.0
No description has been provided for this image
0.800029889239461 0.0
No description has been provided for this image
0.7429505201181322 0.0
No description has been provided for this image
0.9361842846397981 0.0
No description has been provided for this image
In [33]:
#---------------------------------------------------------
fig = plt.figure()
# Adjust figure size.
fig.set_size_inches(14, 21) 
spec = mpl.gridspec.GridSpec(ncols=4, nrows=3) # 6 columns evenly divides both 2 & 3

ax1 = fig.add_subplot(spec[0,0:2]) # row 0 with axes spanning 2 cols on evens
ax2 = fig.add_subplot(spec[0,2:4])
ax3 = fig.add_subplot(spec[1,0:2]) # row 0 with axes spanning 2 cols on odds
ax4 = fig.add_subplot(spec[1,2:4])
ax5 = fig.add_subplot(spec[2,1:3])
axes = [ax1, ax2, ax3, ax4, ax5]

for ii in range(0, 5):
    key = "trajectory" + str(ii + 1)

    
    axes[ii].spines["top"].set_visible(False)
    axes[ii].spines["right"].set_visible(False)
    
    axes[ii].scatter(protein_variance_results[key],
               lipid_variance_results[key]["POP2"])
    
    axes[ii].set_xlabel("A2 Variance (Ų)")
    axes[ii].set_ylabel("PIP2 Variance (Ų)")
    
    z = np.polyfit(protein_variance_results[key],
                   lipid_variance_results[key]["POP2"],
                   1)
    p = np.poly1d(z)
    
    axes[ii].plot(protein_variance_results[key], 
            p(protein_variance_results[key]), ':',
            color = "orange")
    
    axes[ii].ticklabel_format(style = "sci", axis = 'y', 
                       scilimits = (0, 0))
    axes[ii].ticklabel_format(style = "sci", axis = 'x', 
                       scilimits = (0, 0))
    
fig.tight_layout()  # otherwise the right y-label is slightly clipped
plt.show()
##------------------------------------------------
#for key in protein_variance_results:
#    fig, ax = plt.subplots(figsize = (7, 5))
#    
#    ax.spines["top"].set_visible(False)
#    ax.spines["right"].set_visible(False)
#    
#    ax.scatter(protein_variance_results[key],
#               lipid_variance_results[key]["POP2"])
#    
#    ax.set_xlabel("A2 Variance (Ų)")
#    ax.set_ylabel("POP2 Variance (Ų)")
#    
#    z = np.polyfit(protein_variance_results[key],
#                   lipid_variance_results[key]["POP2"],
#                   1)
#    p = np.poly1d(z)
#    
#    ax.plot(protein_variance_results[key], 
#            p(protein_variance_results[key]), ':',
#            color = "orange")
#    
#    ax.ticklabel_format(style = "sci", axis = 'y', 
#                       scilimits = (0, 0))
#    ax.ticklabel_format(style = "sci", axis = 'x', 
#                       scilimits = (0, 0))
#    
#    slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(protein_variance_results[key], 
#                                                                         lipid_variance_results[key]["POP2"])
#    
#    plt.show()
No description has been provided for this image

13) Radial Distribution for A2 Height from¶

13.1) Necessary Functions¶

13.1.1) Universe Transformation to Switch an Atom's Coordinates with the System's COM¶

In [192]:
class replace_with_COM:
    """Replace special atom `atomname` in each fragment with COM of the fragment."""
    def __init__(self, bilayer, selection):
        self.bilayer = bilayer
        self.com_atoms = bilayer.select_atoms(selection)
        
        # sanity check
        #print(self.com_atoms.positions.shape)
        #print(self.get_com().shape)
        #assert self.get_com().shape == self.com_atoms.positions.shape
        
    def get_com(self):
        return self.bilayer.center_of_mass()
    
    def __call__(self, ts):
        self.com_atoms.positions = np.array([list(self.get_com())])
        return ts

13.1.2) Calculate Combined RDF for A2 and the Vesicle¶

In [194]:
def calculate_combined_rdf(ucom):
    # Create selections for groups that may be used for analysis
    selection = "(resname POPC) or (resname POPS) or " + \
                "(resname POP2)"
    lipids = ucom.select_atoms(selection)
    selection = "(protein)"
    a2 = ucom.select_atoms(selection)
    bilayer = ucom.select_atoms("resname POPC or resname POPS or " + \
                                "resname POP2 or resname CHOL")
    selection = "resnum 1 and name C4A"
    ucom.trajectory.add_transformations(replace_with_COM(bilayer, selection))
    #------------------------------------------------------------------
    selection = "resnum 1 and name C4A"
    com_atoms = ucom.select_atoms(selection)
    
    rdf_lipids = mda.analysis.rdf.InterRDF(com_atoms, lipids, nbins = 1000, range = (65., 180.), verbose = True)
    rdf_lipids.run(start = 4000)
    
    rdf_a2 = mda.analysis.rdf.InterRDF(com_atoms, a2, nbins = 1000, range = (65., 180.), verbose = True)
    rdf_a2.run(start = 4000)
    #------------------------------------------------------------------
    rdf_lipids.results["rdf"][0] = 0.0
    rdf_a2.results["rdf"][0] = 0.0
    
    return([rdf_lipids, rdf_a2])

13.1.3) Present the Combined RDF¶

In [212]:
def present_coupled_rdf(combined_rdf_results):
    rdf_lipids = combined_rdf_results[0]
    rdf_a2 = combined_rdf_results[1]
    lipid_peaks, _ = find_peaks(rdf_lipids["rdf"], width = 20)
    a2_peaks, _ = find_peaks(rdf_a2["rdf"], width = 20)
    
    lower_leaf_rdf = rdf_lipids["rdf"][lipid_peaks[0]]
    lower_leaf_rad = rdf_lipids["bins"][lipid_peaks[0]]
    upper_leaf_rdf = rdf_lipids["rdf"][lipid_peaks[1]]
    upper_leaf_rad = rdf_lipids["bins"][lipid_peaks[1]]
    a2_rdf = rdf_a2["rdf"][a2_peaks[0]]
    a2_rad = rdf_a2["bins"][a2_peaks[0]]
    a2_vesi_dist = np.round(a2_rad - upper_leaf_rad, 2)
    
    fig, ax = plt.subplots(figsize = (17, 4))
    
    ax.plot(rdf_lipids["bins"], rdf_lipids["rdf"])
    ax.plot(rdf_a2["bins"], rdf_a2["rdf"])
    
    ax.set_xlabel("Distance from Vesicle Center (Ã…)")
    ax.set_ylabel("g(r)")
    
    ax.plot((lower_leaf_rad, lower_leaf_rad), (0., lower_leaf_rdf), 
            linestyle = "--", color = 'k')
    ax.plot((upper_leaf_rad, upper_leaf_rad), (0., upper_leaf_rdf), 
            linestyle = "--", color = 'k')
    ax.plot((a2_rad, a2_rad), (0., a2_rdf), linestyle = "--", color = 'k')
    ax.plot((upper_leaf_rad, a2_rad), (a2_rdf, a2_rdf), 
            linestyle = "--", color = 'k')
    ax.text(a2_rad - 17.2, a2_rdf + 0.25, str(a2_vesi_dist))
    
    # Legend lines
    vesicle_line = mlines.Line2D([], [], color = "tab:blue",   label = "Vesicle")
    a2_line = mlines.Line2D([], [], color = "tab:orange", label = "A2")
    
    ax.legend(handles = [vesicle_line, a2_line])
    
    plt.show()
In [215]:
def combined_coupled_rdf_plot(rdf_results):
    
    fig = plt.figure()
    # Adjust figure size.
    fig.set_size_inches(30, 21) 
    spec = mpl.gridspec.GridSpec(ncols=4, nrows=3) # 6 columns evenly divides both 2 & 3
    
    ax1 = fig.add_subplot(spec[0,0:2]) # row 0 with axes spanning 2 cols on evens
    ax2 = fig.add_subplot(spec[0,2:4])
    ax3 = fig.add_subplot(spec[1,0:2]) # row 0 with axes spanning 2 cols on odds
    ax4 = fig.add_subplot(spec[1,2:4])
    ax5 = fig.add_subplot(spec[2,1:3])
    axes = [ax1, ax2, ax3, ax4, ax5]

    for ii in range(0, 5):
        
        trajectory = "trajectory" + str(ii + 1)
        plot_data = rdf_results[trajectory]
        
        rdf_lipids = plot_data[0]
        rdf_a2 = plot_data[1]
        lipid_peaks, _ = find_peaks(rdf_lipids["rdf"], width = 20)
        a2_peaks, _ = find_peaks(rdf_a2["rdf"], width = 20)
        
        lower_leaf_rdf = rdf_lipids["rdf"][lipid_peaks[0]]
        lower_leaf_rad = rdf_lipids["bins"][lipid_peaks[0]]
        upper_leaf_rdf = rdf_lipids["rdf"][lipid_peaks[1]]
        upper_leaf_rad = rdf_lipids["bins"][lipid_peaks[1]]
        a2_rdf = rdf_a2["rdf"][a2_peaks[0]]
        a2_rad = rdf_a2["bins"][a2_peaks[0]]
        a2_vesi_dist = np.round(a2_rad - upper_leaf_rad, 2)
            
        axes[ii].plot(rdf_lipids["bins"], rdf_lipids["rdf"])
        axes[ii].plot(rdf_a2["bins"], rdf_a2["rdf"])
            
        axes[ii].set_xlabel("Distance from Vesicle Center (Ã…)")
        axes[ii].set_ylabel("g(r)")
        
        axes[ii].yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%.1f'))
            
            
        axes[ii].plot((lower_leaf_rad, lower_leaf_rad), (0., lower_leaf_rdf), 
                      linestyle = "--", color = 'k')
        axes[ii].plot((upper_leaf_rad, upper_leaf_rad), (0., upper_leaf_rdf), 
                      linestyle = "--", color = 'k')
        axes[ii].plot((a2_rad, a2_rad), (0., a2_rdf), linestyle = "--", color = 'k')
        axes[ii].plot((upper_leaf_rad, a2_rad), (a2_rdf, a2_rdf), 
                       linestyle = "--", color = 'k')
        axes[ii].text(a2_rad - 17.2, a2_rdf + 0.25, str(a2_vesi_dist))

    plt.tight_layout()
    plt.show()

13.2) Perform Calculations¶

In [195]:
trajectories = get_trajectories()
combined_rdf_results = analyze_trajectories(calculate_combined_rdf, 
                                            trajectories)
  0%|          | 0/11001 [00:00<?, ?it/s]
  0%|          | 0/11001 [00:00<?, ?it/s]
  0%|          | 0/11001 [00:00<?, ?it/s]
  0%|          | 0/11001 [00:00<?, ?it/s]
  0%|          | 0/11001 [00:00<?, ?it/s]
  0%|          | 0/11001 [00:00<?, ?it/s]
  0%|          | 0/11001 [00:00<?, ?it/s]
  0%|          | 0/11001 [00:00<?, ?it/s]
  0%|          | 0/11001 [00:00<?, ?it/s]
  0%|          | 0/11001 [00:00<?, ?it/s]

13.3) Save Results¶

In [204]:
for key in combined_rdf_results:
    data = {}
    data["RDF_lipids"] = combined_rdf_results[key][0].results.rdf
    data["RDF_a2"] = combined_rdf_results[key][1].results.rdf
    data["Location"] = combined_rdf_results[key][0].results.bins
    data = pd.DataFrame.from_dict(data)
    data.to_csv("analysis/rdf/combined/" + key + ".csv", index = False)

13.3) Read Results¶

In [213]:
prefix = "analysis/rdf/combined/"
combined_rdf_results = {}
for ii in range(1, 6):
    traj_name = "trajectory" + str(ii)
    combined_rdf_results[traj_name] = []
    combined_rdf_results[traj_name].append({})
    combined_rdf_results[traj_name].append({})
    
    data = pd.read_csv(prefix + "trajectory" + str(ii) + ".csv")
    
    combined_rdf_results[traj_name][0]["bins"] = list(data.iloc[:, 2])
    combined_rdf_results[traj_name][0]["rdf"] = list(data.iloc[:, 0])
    combined_rdf_results[traj_name][1]["bins"] = list(data.iloc[:, 2])
    combined_rdf_results[traj_name][1]["rdf"] = list(data.iloc[:, 1])

13.4) Present Results¶

In [216]:
combined_coupled_rdf_plot(combined_rdf_results)
No description has been provided for this image
In [214]:
present_analysis(present_coupled_rdf, combined_rdf_results)
The following plots correcpond to: trajectory1
No description has been provided for this image
The following plots correcpond to: trajectory2
No description has been provided for this image
The following plots correcpond to: trajectory3
No description has been provided for this image
The following plots correcpond to: trajectory4
No description has been provided for this image
The following plots correcpond to: trajectory5
No description has been provided for this image

13.5) Average Height¶

In [265]:
mean = np.mean([27.26, 26.68, 26.45, 25.42, 26.57])
std = np.std([27.26, 26.68, 26.45, 25.42, 26.57])
stderr_x2 = np.std([27.26, 26.68, 26.45, 25.42, 26.57]) / np.sqrt(5.)
print("The mean height is:", mean, '±', stderr_x2)
The mean height is: 26.476 ± 0.26690222929005286

Overflow¶

These didn't make it into the final manuscript but there were some fun ideas in here. I came up with the lipid entropy idea after taking a machine learning course. I thought the protein orientation one was also interesting but propably better applied to another system.

4) Estimating the Vesicle Radius¶

4.1) Reinitialize the Data¶

In [12]:
# Load trajectory
u = mda.Universe('reimage/image.tpr', 'reimage/reimaged.xtc')


# Create selections for groups that may be used for analysis
selection = "(resname POPC and name PO4 NC3) or " + \
            "(resname POPS and name PO4 CNO) or " + \
            "(resname POP2 and name P1 P2 C1 C2 C3 PO4)"
head_group = u.select_atoms(selection)
# Create selections for groups that may be used for analysis
bilayer = u.select_atoms("resname POPC or resname POPS or " + \
                         "resname POP2 or resname CHOL")

4.2) Collect Center of Mass for Each Head Group¶

In [13]:
head_group_com = np.empty((int(u.trajectory.n_frames / 100) + 1, head_group.n_residues, 3))
ii = 0
for ts in u.trajectory[::frame_skip]:
    # Check progress
    if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
    head_group.positions = head_group.positions - bilayer.center_of_mass()
    head_group_com[ii, :] = head_group.center_of_mass(compound = 'residues')
    ii += 1
print(' ')
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  

4.3) Split Data into Upper and Lower Leaflets¶

In [14]:
# Get the magnitude of the COM vectors
dist_from_center = np.sqrt(np.sum(head_group_com[0, :, :] * head_group_com[0, :, :], axis = 1))
dist_from_center.sort()

# Distribution should be bimodal with a health split between the distributions.
# We can exploit this to determine at exactly what point the split occurs
max_diff = np.max(dist_from_center[1:] - dist_from_center[0:-1])
diff = dist_from_center[1:] - dist_from_center[0:-1]
ii = 0
for ii in range(len(diff)):
    if diff[ii] == max_diff:
        break
    ii += 1
ii += 1
lower_bound = ii

# Use the determined lower-bound for the upper leaflet lipids to
# generate a list for selecting the lipids of the upper leaflet
upper_leaflet_min = dist_from_center[lower_bound]
dist_from_center = np.sqrt(np.sum(head_group_com[0, :, :] * head_group_com[0, :, :], axis = 1))

upper_selection = dist_from_center >= upper_leaflet_min
upper_selection = [ii for ii in range(len(upper_selection)) if upper_selection[ii]]

lower_selection = dist_from_center < upper_leaflet_min
lower_selection = [ii for ii in range(len(lower_selection)) if lower_selection[ii]]

# Define the upper-leaflet lipids
upper_head_group_com = head_group_com[:, upper_selection, :]
lower_head_group_com = head_group_com[:, lower_selection, :]
In [15]:
# Initialize plotting.
fig, ax = plt.subplots()

# Remove top and right spines from the figure.
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)

# Adjust figure size.
#fig.set_size_inches(15, 4)

# Plot data.
ax.hist(dist_from_center)

# Set labels
ax.set_xlabel("Distance from Center (Ã…)")
ax.set_ylabel("Count")

plt.show()
No description has been provided for this image

4.4) Calculate Average Radius Per Frame¶

In [16]:
upper_leaf_radius = []
lower_leaf_radius = []

ii = 0
for ts in u.trajectory[::frame_skip]:
    # Check progress
    if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')

    radius = np.linalg.norm(upper_head_group_com[ii, :, :], axis = 1)
    radius = np.mean(radius)
    upper_leaf_radius.append(radius)

    radius = np.linalg.norm(lower_head_group_com[ii, :, :], axis = 1)
    radius = np.mean(radius)
    lower_leaf_radius.append(radius)
    
    ii += 1
print(' ')

lower_leaf_radius = np.array(lower_leaf_radius)
upper_leaf_radius = np.array(upper_leaf_radius)
vesicle_thickness = upper_leaf_radius - lower_leaf_radius

average_lower_radius = np.mean(lower_leaf_radius)
average_upper_radius = np.mean(upper_leaf_radius )
average_thickness = np.mean(vesicle_thickness)
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  

4.5) Average Outer Radius vs Time¶

In [17]:
plot_data  = [upper_leaf_radius, lower_leaf_radius, vesicle_thickness]
plot_title = ["Upper Leaflet Radius", "Lower Leaflet Radius", 
              "Vesicle Thickness"]
plot_value = [round(average_upper_radius, 2), 
              round(average_lower_radius, 2), 
              round(average_thickness, 2)]
plot_coord = [(-0.3, 197.75), (-0.3, 157.55), (-0.3, 40.225)]



# Initialize plotting.
fig, axs = plt.subplots(3, 1, figsize=(25, 20))

for ii in range(len(plot_data)):
    # Remove top and right spines from the figure.
    axs[ii].spines["top"].set_visible(False)
    axs[ii].spines["right"].set_visible(False)
    axs[ii].plot(time, plot_data[ii])
    axs[ii].axhline(plot_value[ii], color = "blue",linestyle = "--")
    axs[ii].annotate(str(plot_value[ii]), xy = plot_coord[ii], 
                     xytext = plot_coord[ii])
    axs[ii].set_title(plot_title[ii])
    
    if ii == 1:
        axs[ii].set_ylabel("Avg Distance (Ã…)")
axs[ii].set_xlabel("Time (μs)")


plt.show()

print(average_upper_radius)
No description has been provided for this image
197.7944349368595

6) Lipid Pairwise Arc Length¶

6.1) Reinitialize the Data¶

In [22]:
# Load trajectory
u = mda.Universe('reimage/image.tpr', 'reimage/reimaged.xtc')


# Create selections for groups that may be used for analysis
selection = "(resname POPC and name PO4 NC3) or "            + \
            "(resname POPS and name PO4 CNO) or "            + \
            "(resname POP2 and name P1 P2 C1 C2 C3 PO4) or " + \
            "(resname CHOL)" 
head_group = u.select_atoms(selection)
# Create selections for groups that may be used for analysis
bilayer = u.select_atoms("resname POPC or resname POPS or " + \
                         "resname POP2 or resname CHOL")

6.2) Collect Lipid Center of Mass Data¶

In [23]:
lipid_head_com = {}
#for lip in ["POPC", "POPS", "POP2", "CHOL"]:
#for lip in ["POPC"]:
for lip in ["POPC", "POPS", "POP2", "CHOL"]:
    # Generate selection and np.array to store data.
    lipid_sele = head_group.select_atoms("resname " + lip)
    lipid_head_com[lip] = np.empty
    lipid_head_com[lip] = np.empty((int(u.trajectory.n_frames / frame_skip) + 1, 
                              lipid_sele.n_residues, 
                              3))
    ii = 0
    for ts in u.trajectory[::frame_skip]:
        # Check progress
        if ii % track_divisor == 0: 
            print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
        lipid_sele.positions  = lipid_sele.positions - bilayer.center_of_mass()
        lipid_head_com[lip][ii, :] = lipid_sele.center_of_mass(compound = 'residues')
        ii += 1
    print(' ')
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  

6.3) Get Upper Leaflet Head Group COMs¶

In [24]:
for lip in ["POPC", "POPS", "POP2", "CHOL"]:
    head_com = lipid_head_com[lip]
    
    # Get the magnitude of the COM vectors
    dist_from_center = np.sqrt(np.sum(head_com[0, :, :] * head_com[0, :, :], axis = 1))
    # Use the average magnitude to seperate the distributions
    upper_leaf_min = np.mean(dist_from_center)
    upper_selection = dist_from_center > upper_leaf_min
    dist_from_center = np.sqrt(np.sum(head_com[0, :, :] * head_com[0, :, :], axis = 1))
    upper_selection = [ii for ii in range(len(upper_selection)) if upper_selection[ii]]
    
    # Define the upper-leaflet lipids
    lipid_head_com[lip] = head_com[:, upper_selection, :]

6.4) Function to Generate all Pairwise Combinations of Points¶

In [25]:
def numpy_pairwise_combinations(x):
    idx = np.stack(np.triu_indices(len(x), k=1), axis=-1)
    return x[idx]

6.5) Calculate the Average Pairwise Distance¶

In [26]:
# Used to store arc-length results for each lipid.
arc_length_results = {}
for lip in lipid_head_com:
    arc_length_results[lip] = []
    ii = 0
    for ts in u.trajectory[::1000]:
        # Check progress
        if ii % 21 == 0: 
            #print(int(((ii / track_divisor)) * 10), "% ", 
            print(int(((ii / 21)) * 10), "% ", 
                  sep = '', end = '')
    
        frame = lipid_head_com[lip][ii, :, :]
        combinations = numpy_pairwise_combinations(frame)
        combinations
        angles = np.sum(combinations[:, 0, :] * combinations[:, 1, :], axis = 1)
        angles = angles / \
                 (np.linalg.norm(combinations[:, 0, :], axis = 1) * \
                  np.linalg.norm(combinations[:, 1, :], axis = 1))
        angles = np.arccos(angles)
        arclengths = angles * average_upper_radius
        arc_length_results[lip].append(np.mean(arclengths))
        
        ii += 10
    print(' ')
0% 100% 200% 300% 400% 500% 600% 700% 800% 900% 1000% 1100%  
0% 100% 200% 300% 400% 500% 600% 700% 800% 900% 1000% 1100%  
0% 100% 200% 300% 400% 500% 600% 700% 800% 900% 1000% 1100%  
0% 100% 200% 300% 400% 500% 600% 700% 800% 900% 1000% 1100%  

6.6) Plot the Average Pairwise Distance¶

In [30]:
# Initialize plotting.
fig, axs = plt.subplots(4, 1, figsize = (15, 25))

ii = 0
for lip in arc_length_results:
    # Remove top and right spines from the figure.
    axs[ii].spines["top"].set_visible(False)
    axs[ii].spines["right"].set_visible(False)
    
    # Set y range
    axs[ii].set_ylim(300.0, 314.0)
    
    # Plot data.
    axs[ii].plot(time[::10], arc_length_results[lip])
    
    # Set labels
    if ii == 0:
        axs[ii].set_ylabel("Avg Pairwise Dist (Ã…)")
    
    # Calculate equation for trendline
    z = np.polyfit(time[::10], arc_length_results[lip], 1)
    p = np.poly1d(z)
    
    # Get r^2 of the fit and other statistics
    slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(time[::10], arc_length_results[lip])
    
    print(lip, slope)
    # Add r^2 value to the plot
    axs[ii].annotate(f"R² = {round(r_value, 2)}", xy = (0., 309.5), 
                xytext = (0., 309.5))
    
    # Add trendline to plot
    axs[ii].plot(time[::10], p(time[::10]), linestyle = "dashed", 
            color = "orange")

    ii += 1

#axs[ii].set_xlabel("Time (us)")

plt.show()
POPC -0.003604287106444494
POPS -0.003312059177950976
POP2 -0.2804137219387939
CHOL -0.003930015608562447
No description has been provided for this image
In [ ]:
 
In [ ]:
 

7) Lipid Entropy¶

7.1) Reinitialize Data¶

In [356]:
# Load trajectory
u = mda.Universe('reimage/image.tpr', 'reimage/reimaged.xtc')


# Create selections for groups that may be used for analysis
selection = "(resname POPC and name PO4 NC3) or "            + \
            "(resname POPS and name PO4 CNO) or "            + \
            "(resname POP2 and name P1 P2 C1 C2 C3 PO4) or " + \
            "(resname CHOL)" 
head_group = u.select_atoms(selection)
# Create selections for groups that may be used for analysis
bilayer = u.select_atoms("resname POPC or resname POPS or " + \
                         "resname POP2 or resname CHOL")

7.2) Collect PIP2 Center of Mass Data¶

In [357]:
lipid_head_com = {}
#for lip in ["POPC", "POPS", "POP2", "CHOL"]:
#for lip in ["POPC"]:
for lip in ["POPC", "POPS", "POP2", "CHOL"]:
    # Generate selection and np.array to store data.
    lipid_sele = head_group.select_atoms("resname " + lip)
    lipid_head_com[lip] = np.empty
    lipid_head_com[lip] = np.empty((int(u.trajectory.n_frames / frame_skip) + 1, 
                              lipid_sele.n_residues, 
                              3))
    ii = 0
    for ts in u.trajectory[::frame_skip]:
        # Check progress
        if ii % track_divisor == 0: 
            print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
        lipid_sele.positions  = lipid_sele.positions - bilayer.center_of_mass()
        lipid_head_com[lip][ii, :] = lipid_sele.center_of_mass(compound = 'residues')
        ii += 1
    print(' ')
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  

7.3) Split Lipids by Leaflet¶

In [358]:
for lip in ["POPC", "POPS", "POP2", "CHOL"]:
    head_com = lipid_head_com[lip]
    
    # Get the magnitude of the COM vectors
    dist_from_center = np.sqrt(np.sum(head_com[0, :, :] * head_com[0, :, :], axis = 1))
    # Use the average magnitude to seperate the distributions
    upper_leaf_min = np.mean(dist_from_center)
    upper_selection = dist_from_center > upper_leaf_min
    dist_from_center = np.sqrt(np.sum(head_com[0, :, :] * head_com[0, :, :], axis = 1))
    upper_selection = [ii for ii in range(len(upper_selection)) if upper_selection[ii]]
    
    # Define the upper-leaflet lipids
    lipid_head_com[lip] = head_com[:, upper_selection, :]

7.4) Function to Evenly Distribute Points on a Sphere¶

In [359]:
def fibonacci_sphere(samples=1000):

    points = []
    phi = math.pi * (3. - math.sqrt(5.))  # golden angle in radians

    for i in range(samples):
        y = 1 - (i / float(samples - 1)) * 2  # y goes from 1 to -1
        radius = math.sqrt(1 - y * y)  # radius at y

        theta = phi * i  # golden angle increment

        x = math.cos(theta) * radius
        z = math.sin(theta) * radius

        points.append(np.array([x, y, z]))
    points = np.array(points)
    return points

7.5) Calculate the Entropy¶

In [360]:
from sklearn.neighbors import NearestNeighbors

vesicle_surface_area = 4 * np.pi * (average_upper_radius ** 2)
number_lipids = len(lipid_head_com["POPC"][0, :, :]) + \
                len(lipid_head_com["POPS"][0, :, :]) + \
                len(lipid_head_com["POP2"][0, :, :]) + \
                len(lipid_head_com["CHOL"][0, :, :])
area_per_lipid = vesicle_surface_area / number_lipids

entropy_results = {}
for lip in lipid_head_com:
    entropy_results[lip] = []
    
    number_lipids = len(lipid_head_com[lip][0, :, :])
    
    one_cluster_area = len(lipid_head_com[lip][0, :, :]) * \
                           area_per_lipid
    num_bins = int(np.round(vesicle_surface_area / \
                            one_cluster_area))
    print(lip, ': ', num_bins)
    
    uniform_sphere = fibonacci_sphere(samples = num_bins)
    groups = np.arange(0, len(uniform_sphere), 1)
    
    neigh = NearestNeighbors(n_neighbors = 1)
    neigh.fit(uniform_sphere, groups)
    
    ii = 0
    for ts in u.trajectory[::frame_skip]:
        # Check progress
        if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
    
        frame = lipid_head_com[lip][ii, :, :]
        frame_norm = np.linalg.norm(frame, axis = 1)
        frame = frame / frame_norm[:, None]
        frame
    
        dist, group = neigh.kneighbors(frame)
    
        unique, counts = np.unique(group, return_counts=True)
        counts = np.array(list(dict(zip(unique, counts)).values()))
        p = counts / number_lipids
        log_p = np.log(p)
        entropy = -1. * np.sum(p * log_p)
    
        entropy_results[lip].append(entropy)
    
        ii += 1
    print(' ')
POPC :  2
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
POPS :  5
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
POP2 :  12
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
CHOL :  10
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
In [361]:
# Initialize plotting.
fig, axs = plt.subplots(3, 1, figsize = (15, 25))

ii = 0
#for lip in entropy_results:
for lip in ["POPS", "POP2", "CHOL"]:
    # Remove top and right spines from the figure.
    axs[ii].spines["top"].set_visible(False)
    axs[ii].spines["right"].set_visible(False)
    
    # Set y range
    #axs[ii].set_ylim(310.0, 312.0)
    
    # Plot data.
    axs[ii].plot(time, entropy_results[lip])
    
    # Set labels
    if ii == 1:
        axs[ii].set_ylabel("Avg Pairwise Dist (Ã…)")
    
    # Calculate equation for trendline
    z = np.polyfit(time, entropy_results[lip], 1)
    p = np.poly1d(z)
    
    # Get r^2 of the fit and other statistics
    slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(time, entropy_results[lip])
    
    print(lip, slope, r_value ** 2)
    # Add r^2 value to the plot
    axs[ii].annotate(f"R² = {round(r_value, 2)}", xy = (0., 309.5), 
                xytext = (0., 309.5))
    
    # Add trendline to plot
    axs[ii].plot(time, p(time), linestyle = "dashed", 
            color = "orange")
    
    #if ii == 2:
        

    ii += 1

#axs[ii].set_xlabel("Time (us)")

plt.show()
POPS 1.2979219925472855e-05 0.0025050808607435648
POP2 -0.0019054516556743983 0.4351049078286086
CHOL 0.00018085970378770206 0.05351522879602239
No description has been provided for this image
No description has been provided for this image

7) Protein Pairwise Arc Length¶

7.1) Reinitialize the Data¶

In [28]:
# Load trajectory
u = mda.Universe('reimage/image.tpr', 'reimage/reimaged.xtc')

# Create selections for groups that may be used for analysis
selection = "(not resname POPC) and " + \
            "(not resname POPS) and " + \
            "(not resname POP2) and " + \
            "(not resname CHOL)"
protein    = u.select_atoms(selection)
# Create selections for groups that may be used for analysis
selection = "resname POPC or resname POPS or " + \
            "resname POP2 or resname CHOL"
bilayer = u.select_atoms(selection)

7.2) Collect Protein Center of Mass Data¶

In [29]:
protein_com = np.empty((int(u.trajectory.n_frames / frame_skip) + 1, 10, 3))
ii = 0
for ts in u.trajectory[::frame_skip]:
    # Check progress
    if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
    protein.positions = protein.positions - bilayer.center_of_mass()
    protein_com[ii, :] = protein.center_of_mass(compound = 'molecules')
    ii += 1
print(' ')
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  

7.3) Function to Generate all Pairwise Combinations of Points¶

In [30]:
def numpy_pairwise_combinations(x):
    idx = np.stack(np.triu_indices(len(x), k=1), axis=-1)
    return x[idx]

7.4) Calculate the Average Pairwise Distance¶

In [31]:
avg_arclength_protein = []

ii = 0
for ts in u.trajectory[::frame_skip]:
    # Check progress
    if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')

    frame = protein_com[ii, :, :]
    combinations = numpy_pairwise_combinations(frame)
    combinations
    angles = np.sum(combinations[:, 0, :] * combinations[:, 1, :], axis = 1)
    angles = angles / (np.linalg.norm(combinations[:, 0, :], axis = 1) * np.linalg.norm(combinations[:, 1, :], axis = 1))
    angles = np.arccos(angles)
    arclengths = angles * average_upper_radius
    avg_arclength_protein.append(np.mean(arclengths))
    
    ii += 1
print(' ')
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  

7.5) Plot the Average Pairwise Distance¶

In [32]:
# Initialize plotting.
fig, ax = plt.subplots()

# Remove top and right spines from the figure.
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)

# Adjust figure size.
fig.set_size_inches(15, 4)

# Plot data.
ax.plot(time[550:], avg_arclength_protein[550:])
#ax.set_xlim([5.0, 18.8])
#ax.set_ylim([290, 330])

# Set labels
ax.set_xlabel("Time (us)")
ax.set_ylabel("Avg Pairwise Dist (Ã…)")

# Calculate equation for trendline
z = np.polyfit(time[550:], avg_arclength_protein[550:], 1)
p = np.poly1d(z)

slope, intercept, r_value, p_value, std_err = \
    scipy.stats.linregress(time[550:], avg_arclength_protein[550:])

# Add trendline to plot
ax.plot(time, p(time), linestyle = "dashed",color = "orange")

ax.annotate("R² = " + str(round(r_value, 2)), xy = (5.6, 315.), 
            xytext = (5.6, 305.))

plt.show()
No description has been provided for this image
In [295]:
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(time[550:], avg_arclength_protein[550:])
print("The slope is: ", slope)
print("The r-squared value is: %f" % (r_value * r_value))
print("The p-value is: %f" % p_value)
The slope is:  -4.497303994825977
The r-squared value is: 0.751070
The p-value is: 0.000000

8) Protein-Lipid Pairwise Arc-Length Relationship¶

In [33]:
# Initialize plotting.
fig, ax = plt.subplots()

# Remove top and right spines from the figure.
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)

# Adjust figure size.
fig.set_size_inches(9, 4)

# Plot data.
ax.scatter(avg_arclength_protein[550:], avg_arclength_pop2[550:])

# Set labels
ax.set_xlabel("Time (us)")
ax.set_ylabel("Avg Pairwise Dist (Ã…)")

# Calculate equation for trendline
z = np.polyfit(avg_arclength_protein[550:], avg_arclength_pop2[550:], 1)
p = np.poly1d(z)

# Add trendline to plot
ax.plot(avg_arclength_protein[550:], p(avg_arclength_protein[550:]), color = "orange")

plt.show()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Input In [33], in <cell line: 12>()
      9 fig.set_size_inches(9, 4)
     11 # Plot data.
---> 12 ax.scatter(avg_arclength_protein[550:], avg_arclength_pop2[550:])
     14 # Set labels
     15 ax.set_xlabel("Time (us)")

NameError: name 'avg_arclength_pop2' is not defined
In [ ]:
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(avg_arclength_protein[550:], avg_arclength_pop2[550:])
print("The slope is: ", slope)
print("The r-squared value is: %f" % (r_value * r_value))
print("The p-value is: %f" % p_value)

9) Lipid Diffusion Coefficients¶

Re-Initialize the Data¶

In [34]:
# Load trajectory
u = mda.Universe('reimage/image.tpr', 'reimage/reimaged.xtc')

# Create selections for groups that may be used for analysis
bilayer = u.select_atoms("resname POPC or resname POPS or " + \
                         "resname POP2 or resname CHOL")

Collect Bilayer Coordinates¶

In [35]:
bilayer_coms = np.empty((int(u.trajectory.n_frames / frame_skip) + 1, bilayer.n_residues, 3))
ii = 0
for ts in u.trajectory[::frame_skip]:
    # Check progress
    if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
    bilayer.positions  = bilayer.positions - bilayer.center_of_mass()
    bilayer_coms[ii, :] = bilayer.center_of_mass(compound = 'residues')
    ii += 1
print(' ')
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
In [44]:
len(bilayer_coms)
Out[44]:
2393

Calculate MSD using Arc Length¶

In [70]:
# Used to collect MSD data
msd = {"POPC" : [], "POPS" : [], "POP2" : [], "CHOL" : [], "ALL" : []}

# Generate selection indicies for each of the individual lipids
popc_sele = bilayer.atoms[bilayer.atoms.resnames == "POPC"].resnums
popc_sele = np.unique(popc_sele - 1)
pops_sele = bilayer.atoms[bilayer.atoms.resnames == "POPS"].resnums
pops_sele = np.unique(pops_sele - 1)
pop2_sele = bilayer.atoms[bilayer.atoms.resnames == "POP2"].resnums
pop2_sele = np.unique(pop2_sele - 1)
chol_sele = bilayer.atoms[bilayer.atoms.resnames == "CHOL"].resnums
chol_sele = np.unique(chol_sele - 1)

# Progress tracker
ii = 0 
for ts in u.trajectory[::frame_skip]:
    # Check progress
    if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')

    # Get info for distance calc
    ref_frame = bilayer_coms[1500, :, :]
    frame = bilayer_coms[ii, :, :]
    
    # Calculate arc length with reference to initial position
    angles = np.sum(ref_frame * frame, axis = 1)
    angles = angles / (np.linalg.norm(frame, axis = 1) * np.linalg.norm(ref_frame, axis = 1))
    angles = np.arccos(angles)
    
    arclengths = angles * average_upper_radius
    
    # Calculate the MSD for all lipids as specific lipids
    msd["ALL"].append(np.mean(arclengths * arclengths))
    msd["POPC"].append(np.mean(arclengths[popc_sele] * \
                               arclengths[popc_sele]))
    msd["POPS"].append(np.mean(arclengths[pops_sele] * \
                               arclengths[pops_sele]))
    msd["POP2"].append(np.mean(arclengths[pop2_sele] * \
                               arclengths[pop2_sele]))
    msd["CHOL"].append(np.mean(arclengths[chol_sele] * \
                               arclengths[chol_sele]))
    
    ii += 1
print(' ')
# Change the 'nan' msd calculation to 
msd["POPC"][0] = 0.
msd["POPS"][0] = 0.
msd["POP2"][0] = 0.
msd["CHOL"][0] = 0.
msd["ALL"][0] = 0.

# Convert lists to np arrays
msd["POPC"] = np.array(msd["POPC"])
msd["POPS"] = np.array(msd["POPS"])
msd["POP2"] = np.array(msd["POP2"])
msd["CHOL"] = np.array(msd["CHOL"])
msd["ALL"]  = np.array(msd["ALL"])
0% 10% 20% 30% 40% 50% 60% 
/tmp/ipykernel_151103/3090153956.py:27: RuntimeWarning: invalid value encountered in arccos
  angles = np.arccos(angles)
70% 80% 90% 100%  
In [50]:
fig, ax = plt.subplots()

#ax.set_xlim([10 ** 0,10 ** 6])
#ax.set_ylim([10 ** 0,10 ** 6])

ax.plot(time[1000:1100] * 1000., msd["POPC"][1000:1100], label = "POPC")
ax.plot(time[1000:1100] * 1000., msd["POPS"][1000:1100], label = "POPS")
ax.plot(time[1000:1100] * 1000., msd["POP2"][1000:1100], label = "POP2")
ax.plot(time[1000:1100] * 1000., msd["CHOL"][1000:1100], label = "CHOL")
ax.plot(time[1000:1100] * 1000., msd["ALL"][1000:1100], label = "ALL")

#ax.axvline(100., color = "red",linestyle = "--")
#ax.axvline(2500., color = "red",linestyle = "--")

ax.set_xlabel("Time (ns)")
ax.set_ylabel("Mean Squared\nDisplacement (Ų)")

ax.legend()
plt.show()
No description has been provided for this image

Check for Linear Region of Log Plot¶

In [47]:
fig, ax = plt.subplots()

ax.set_xlim([10 ** 0,10 ** 6])
ax.set_ylim([10 ** 0,10 ** 6])

ax.loglog(time[1:] * 1000., msd["POPC"][1:], label = "POPC")
ax.loglog(time[1:] * 1000., msd["POPS"][1:], label = "POPS")
ax.loglog(time[1:] * 1000., msd["POP2"][1:], label = "POP2")
ax.loglog(time[1:] * 1000., msd["CHOL"][1:], label = "CHOL")
ax.loglog(time[1:] * 1000., msd["ALL"][1:], label = "ALL")

ax.axvline(100., color = "red",linestyle = "--")
ax.axvline(2500., color = "red",linestyle = "--")

ax.set_xlabel("Time (ns)")
ax.set_ylabel("Mean Squared\nDisplacement (Ų)")

ax.legend()
plt.show()
No description has been provided for this image

Plot MSD with Trendline for Selected Region¶

In [67]:
#slope, intercept, r_value, p_value, std_err = \
#scipy.stats.linregress(time[lower_bound:upper_bound], 
#                       msd[current_key][lower_bound:upper_bound])
time[lower_bound:upper_bound]
msd[current_key][lower_bound:upper_bound]
Out[67]:
array([           nan,   280.06406534,   583.65668546,   816.04020584,
        1160.667803  ,  1443.29150359,  1771.9387194 ,  2127.31425819,
        2492.43716227,  2669.20407497,  2878.33354155,  3279.87212495,
        3532.89234283,  3967.59264187,  4192.77985047,  4289.01416178,
        4930.23360422,  5278.8645885 ,  5408.08195404,  5739.30960802,
        5985.18602883,  6579.54776716,  6930.68218801,  6881.98175111,
        6905.29461471,  7286.99399176,  7275.4894873 ,  7655.39546011,
        8100.35014169,  8517.80986959,  9229.22802286,  9507.88342198,
        9391.09243177,  9769.91707169,  9903.30571437, 10320.87419198,
       10497.44061686, 11127.85858315, 11691.93110884, 11631.31567832,
       12133.38798729, 11895.61031238, 12327.87113712, 11791.58101714,
       12049.87881323, 12162.35067832, 12407.73324873, 12637.00144672,
       12947.56091774, 12973.3899526 , 12935.06393963, 13078.72944584,
       13470.2450592 , 13551.60587073, 13499.61964139, 13739.79629186,
       14080.80555569, 14361.26167888, 14705.22881079, 14756.34082044,
       14963.49925294, 15338.59148305, 15568.45487487, 15660.22166779,
       15917.19097419, 16232.4183077 , 16663.5394359 , 16873.50587676,
       17132.84066933, 17679.38672097, 17987.86015596, 18436.56878276,
       18526.96330888, 18901.4453793 , 19465.50019425, 19982.09662542,
       20160.32426309, 20784.13187344, 21110.27005857, 21302.51357703,
       21682.53090061, 21818.69168877, 21839.27697413, 21891.78236872,
       21676.77867584, 22267.99183759, 22543.82595131, 22697.06126018,
       22395.73117373, 22546.9058549 , 22385.14086497, 22712.54989915,
       23016.30378819, 23299.67508273, 23699.94374579, 23708.86942405,
       24261.99146611, 24340.1150816 , 24610.5859035 , 24898.46218483])
In [71]:
# Determine index for selection of data
#lower_sele = (time * 1000.) > 100
#upper_sele = (time * 1000.) < 10 ** 6
#
#lower_sele = [ii for ii in range(len(selection)) if lower_sele[ii]]
#upper_sele = [ii for ii in range(len(selection)) if upper_sele[ii]]
#lower_bound = np.min(lower_sele)
#upper_bound = np.max(upper_sele)
lower_bound = 1501
upper_bound = 1600

msd_keys = ["POPC", "POPS", "POP2", "CHOL", "ALL"]
plot_colors = ["tab:blue", "tab:orange", "tab:red", 
               "tab:green", "tab:purple"]

# Initialize plotting.
fig, ax = plt.subplots(figsize = (15, 5))

for ii in range(len(msd_keys)):
    current_key = msd_keys[ii]

    # Plot data.
    ax.plot(time[lower_bound:upper_bound], 
            msd[current_key][lower_bound:upper_bound],
            label = current_key, color = plot_colors[ii])
    
    # Calculate equation for trendline
    z = np.polyfit(time[lower_bound:upper_bound], 
                   msd[current_key][lower_bound:upper_bound], 1)
    p = np.poly1d(z)

    # Add trendline to plot
    ax.plot(time[lower_bound:upper_bound], 
            p(time[lower_bound:upper_bound]), 
            label = current_key, color = plot_colors[ii],
            linestyle = "dashed")
    
    slope, intercept, r_value, p_value, std_err = \
                scipy.stats.linregress(time[lower_bound:upper_bound], 
                                       msd[current_key][lower_bound:upper_bound])
    print("The " + current_key + \
          " diffusion coefficient is: %f" % (slope / 6.))

# Remove top and right spines from the figure.
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)

# Set labels
ax.set_xlabel("Time (μs)")
ax.set_ylabel("MSD (Ų)")

# Legend lines
popc_line = mlines.Line2D([], [], color = "tab:blue",   label = "POPC")
pops_line = mlines.Line2D([], [], color = "tab:orange", label = "POPS")
pop2_line = mlines.Line2D([], [], color = "tab:red",    label = "POP2")
chol_line = mlines.Line2D([], [], color = "tab:green",  label = "CHOL")
all_line  = mlines.Line2D([], [], color = "tab:purple", label = "ALL")

ax.legend(handles = [popc_line, pops_line, pop2_line, 
                     chol_line, all_line])

plt.show()
The POPC diffusion coefficient is: 4049.415038
The POPS diffusion coefficient is: 3948.195776
The POP2 diffusion coefficient is: 3053.942487
The CHOL diffusion coefficient is: 5204.770923
The ALL diffusion coefficient is: 4088.231257
No description has been provided for this image
In [26]:
print("The slope is: %f" % slope)
print("The r-squared value is: %f" % (r_value * r_value))
print("The p-value is: %f" % p_value)
The slope is: 25374.713601
The r-squared value is: 0.975179
The p-value is: 0.000000

Protein Orientation¶

Reinitialize Data¶

In [382]:
# Load trajectory
u = mda.Universe('reimage/image.tpr', 'reimage/reimaged.xtc')

# Create selections for groups that may be used for analysis
selection = "(not resname POPC) and " + \
            "(not resname POPS) and " + \
            "(not resname POP2) and " + \
            "(not resname CHOL)" 
protein    = u.select_atoms(selection)
# Create selections for groups that may be used for analysis
selection = "resname POPC or resname POPS or " + \
            "resname POP2 or resname CHOL"
bilayer = u.select_atoms(selection)

Generate Selections for Individual A2s¶

In [383]:
# Proteins to select for contact analysis
#              A  B  C  D  E  F  G  H  I  J
protein_ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
id_sele = {}
for pid in protein_ids:
    id_sele[pid] = np.logical_and((12785 + (pid * 308)) <= protein.resids, 
                                  protein.resids <= (13092 + (pid * 308)))
id_sele
Out[383]:
{0: array([ True,  True,  True, ..., False, False, False]),
 1: array([False, False, False, ..., False, False, False]),
 2: array([False, False, False, ..., False, False, False]),
 3: array([False, False, False, ..., False, False, False]),
 4: array([False, False, False, ..., False, False, False]),
 5: array([False, False, False, ..., False, False, False]),
 6: array([False, False, False, ..., False, False, False]),
 7: array([False, False, False, ..., False, False, False]),
 8: array([False, False, False, ..., False, False, False]),
 9: array([False, False, False, ...,  True,  True,  True])}

TEST: Calculate Principal Axes¶

Process for Calculating Angle of Orientation

  1. Translate system such that the vesicle center of mass is at the origin.
  2. Define $v_{1}$ and $v_{3}$ using the principal_axes method of MDAnalysis.
  3. Define and normalize $v_{com}$ using the center of mass of the A2 subunit.
  4. Ensure $v_{1}$ and $v_{3}$ are oriented properly.
    1. $v_{1}$ should be closer to ASP-298-SC1 than ASP-123-SC1.
    2. $v_{3}$ should be closer to ARG-195-SC2 than ARG-77-SC2.
    3. NOTE: Add each $v$ to the A2 COM prior to checking distance.
    4. NOTE: Correct the vector by multiplying by -1.
  5. Align $v_{3}$ to the x-axis.
    1. NOTE: Perform same rotations on the other vectors.
  6. Rotate $v_{1}$ about the x-axis until it is aligned with the y-axis.
    1. NOTE: Perform the same rotation on the ${v}$
  7. Project $v_{com}$ onto the xy-plane by taking the x and y coordinates.
  8. Rotate $v_{com}$ about the z-axis until it is aligned with the y-axis.
  9. Calculate the directional angle between $v_{1}$ and the y-axis.
In [384]:
import geometry_analysis as ga

ii = 0
a2_selection = id_sele[ii]
# 1) Translate system such that vesicle is at the center
protein.atoms.positions = protein.atoms.positions - \
                          bilayer.center_of_mass()
# 2) Get the principal axes
axes = protein.atoms[a2_selection].principal_axes()
v1 = axes[0, :]
v3 = axes[2, :]
# 3) Define and normalize a vector using the A2 COM
vcom = protein.atoms[a2_selection].center_of_mass()
vcom = vcom / np.linalg.norm(vcom)
# 4) Check v1 and v3 orientation; correct, if needed.
a2_com = protein.atoms[a2_selection].center_of_mass()

# Get that atom locations
selection = "resnum " 
selection = selection + str(12785 + (ii * 308) + 298 - 31)
selection = selection + " and name SC1"
v1_atom_correct = u.select_atoms(selection).atoms.positions[0]

selection = "resnum " 
selection = selection + str(12785 + (ii * 308) + 123 - 31)
selection = selection + " and name SC1"
v1_atom_wrong = u.select_atoms(selection).atoms.positions[0]

selection = "resnum " 
selection = selection + str(12785 + (ii * 308) + 195 - 31)
selection = selection + " and name SC2"
v3_atom_correct = u.select_atoms(selection).atoms.positions[0]

selection = "resnum " 
selection = selection + str(12785 + (ii * 308) + 77 - 31)
selection = selection + " and name SC2"
v3_atom_wrong = u.select_atoms(selection).atoms.positions[0]

# Compare atom distances to determine if orientation is correct.
correct_distance = np.linalg.norm((v1 + a2_com) - v1_atom_correct)
wrong_distance = np.linalg.norm((v1 + a2_com) - v1_atom_wrong)
if correct_distance < wrong_distance:
    pass
else:
    v1 = v1 * -1.
    
correct_distance = np.linalg.norm((v3 + a2_com) - v3_atom_correct)
wrong_distance = np.linalg.norm((v3 + a2_com) - v3_atom_wrong)
if correct_distance < wrong_distance:
    pass
else:
    v3 = v3 * -1.
    
# 5) Align v3 to the x-axis
rot_mat = ga.rotation_matrix_from_vectors(v3, np.array([1., 0., 0.]))
v3 = rot_mat.dot(v3)
v1 = rot_mat.dot(v1)
rot_mat = rot_mat.dot(vcom)

# 6) Align v1 with the y-axis
#angle = ga.directional_angle(v3, [1., 1., 1.], 
#                             [0., 0., 0.])
rot_mat = ga.rotation_matrix_from_vectors(v1, np.array([0., 1., 0.]))
v3 = rot_mat.dot(v3)
v1 = rot_mat.dot(v1)
rot_mat = rot_mat.dot(vcom)

# 7) Project vcom onto the xy plane
vcom = np.array([vcom[0], vcom[1], 0.])

# 8) Align vcom with the y-axis
rot_mat = ga.rotation_matrix_from_vectors(vcom, np.array([0., 1., 0.]))
v1 = rot_mat.dot(v1)
rot_mat = rot_mat.dot(vcom)

# 9) Calculate directional angle between v1 and the y-axis
#ga.directional_angle(v1, [0., 1., 0.], [0., 0., 1.])
ga.directional_angle([0., 1., 0.], v1, [0., 0., 1.])
Out[384]:
1.4232829753661784
In [388]:
import geometry_analysis as ga

# Collection of all sampled angles
angles = []
# Collection of angles vs time for all proteins
individual_angles = {}

# All monomers are associated by this point
lower_cut = 80000
upper_cut = 110000
for key in id_sele:
    a2_selection = id_sele[key]
    
    individual_angles[key] = []
    #for ts in u.trajectory[lower_cut:upper_cut:100]:
    for ts in u.trajectory[80000::100]:
        # 1) Translate system such that vesicle is at the center
        protein.atoms.positions = protein.atoms.positions - \
                                  bilayer.center_of_mass()
        # 2) Get the principal axes
        axes = protein.atoms[a2_selection].principal_axes()
        v1 = axes[0, :]
        v3 = axes[2, :]
        # 3) Define and normalize a vector using the A2 COM
        vcom = protein.atoms[a2_selection].center_of_mass()
        vcom = vcom / np.linalg.norm(vcom)
        # 4) Check v1 and v3 orientation; correct, if needed.
        a2_com = protein.atoms[a2_selection].center_of_mass()
        
        # Get that atom locations
        selection = "resnum " 
        selection = selection + str(12785 + (ii * 308) + 298 - 31)
        selection = selection + " and name SC1"
        v1_atom_correct = u.select_atoms(selection).atoms.positions[0]
        
        selection = "resnum " 
        selection = selection + str(12785 + (ii * 308) + 123 - 31)
        selection = selection + " and name SC1"
        v1_atom_wrong = u.select_atoms(selection).atoms.positions[0]
        
        selection = "resnum " 
        selection = selection + str(12785 + (ii * 308) + 195 - 31)
        selection = selection + " and name SC2"
        v3_atom_correct = u.select_atoms(selection).atoms.positions[0]
        
        selection = "resnum " 
        selection = selection + str(12785 + (ii * 308) + 77 - 31)
        selection = selection + " and name SC2"
        v3_atom_wrong = u.select_atoms(selection).atoms.positions[0]
        
        # Compare atom distances to determine if orientation is correct.
        correct_distance = np.linalg.norm((v1 + a2_com) - v1_atom_correct)
        wrong_distance = np.linalg.norm((v1 + a2_com) - v1_atom_wrong)
        if correct_distance < wrong_distance:
            pass
        else:
            v1 = v1 * -1.
            
        correct_distance = np.linalg.norm((v3 + a2_com) - v3_atom_correct)
        wrong_distance = np.linalg.norm((v3 + a2_com) - v3_atom_wrong)
        if correct_distance < wrong_distance:
            pass
        else:
            v3 = v3 * -1.
            
        # 5) Align v3 to the x-axis
        rot_mat = ga.rotation_matrix_from_vectors(v3, np.array([1., 0., 0.]))
        v3 = rot_mat.dot(v3)
        v1 = rot_mat.dot(v1)
        rot_mat = rot_mat.dot(vcom)
        
        # 6) Align v1 with the y-axis
        #angle = ga.directional_angle(v3, [1., 1., 1.], 
        #                             [0., 0., 0.])
        rot_mat = ga.rotation_matrix_from_vectors(v1, np.array([0., 1., 0.]))
        v3 = rot_mat.dot(v3)
        v1 = rot_mat.dot(v1)
        rot_mat = rot_mat.dot(vcom)
        
        # 7) Project vcom onto the xy plane
        vcom = np.array([vcom[0], vcom[1], 0.])
        
        # 8) Align vcom with the y-axis
        rot_mat = ga.rotation_matrix_from_vectors(vcom, np.array([0., 1., 0.]))
        v1 = rot_mat.dot(v1)
        rot_mat = rot_mat.dot(vcom)
        
        # 9) Calculate directional angle between v1 and the y-axis
        #ga.directional_angle(v1, [0., 1., 0.], [0., 0., 1.])
        angles.append(ga.directional_angle([0., 1., 0.], v1, [0., 0., 1.]))
        
        individual_angles[key].append(angles[-1])
angles = np.array(angles)
#angles = angles * (180. / np.pi)
In [386]:
np.seterr(all = "ignore")

fig, axs = plt.subplots(10, 1, figsize=(25, 40))
for ii in range(len(individual_angles)):
    angles = np.array(individual_angles[ii]) * 180. / np.pi
    axs[ii].set_ylim(0, 360)
    axs[ii].plot(full_time[80000::100], angles, label = "POPC")
    axs[ii].set_title(str(ii))
handles, labels = axs[0].get_legend_handles_labels()
fig.legend(handles, labels, loc = "center right")
plt.show()
No description has been provided for this image
In [390]:
N = 80
bottom = 8
max_height = 4

fig, ax = plt.subplots(figsize=(25, 40))

hist_data = np.histogram(angles, range = [0., np.pi * 2.], bins = 80)

theta = hist_data[1]
theta = (theta[1:] + theta[:-1]) / 2.

counts = hist_data[0]
radii = (counts / np.max(counts)) * max_height
width = (2*np.pi) / N

ax = plt.subplot(111, polar=True)
bars = ax.bar(theta, radii, width=width, bottom=bottom)
#ax.xaxis("off")
#ax.axes.get_yaxis().set_visible(False)
ax.axes.get_yaxis().set_ticks([8., 9., 10., 11.])
ax.axes.yaxis.set_ticklabels([])
ax.tick_params(axis='x', labelrotation=270)

# Use custom colors and opacity
for r, bar in zip(radii, bars):
    bar.set_facecolor(plt.cm.viridis(r / 10.))
    bar.set_alpha(0.8)

plt.show()
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Input In [390], in <cell line: 16>()
     13 radii = (counts / np.max(counts)) * max_height
     14 width = (2*np.pi) / N
---> 16 ax = plt.subplot(111, polar=True, figure = fig)
     17 bars = ax.bar(theta, radii, width=width, bottom=bottom)
     18 #ax.xaxis("off")
     19 #ax.axes.get_yaxis().set_visible(False)

File ~/apps/miniconda3/envs/tutorial/lib/python3.10/site-packages/matplotlib/pyplot.py:1283, in subplot(*args, **kwargs)
   1280             break
   1281 else:
   1282     # we have exhausted the known Axes and none match, make a new one!
-> 1283     ax = fig.add_subplot(*args, **kwargs)
   1285 fig.sca(ax)
   1287 bbox = ax.bbox

File ~/apps/miniconda3/envs/tutorial/lib/python3.10/site-packages/matplotlib/figure.py:752, in FigureBase.add_subplot(self, *args, **kwargs)
    648 """
    649 Add an `~.axes.Axes` to the figure as part of a subplot arrangement.
    650 
   (...)
    747     fig.add_subplot(ax1)  # add ax1 back to the figure
    748 """
    749 if 'figure' in kwargs:
    750     # Axes itself allows for a 'figure' kwarg, but since we want to
    751     # bind the created Axes to self, it is not allowed here.
--> 752     raise TypeError(
    753         "add_subplot() got an unexpected keyword argument 'figure'")
    755 if len(args) == 1 and isinstance(args[0], SubplotBase):
    756     ax = args[0]

TypeError: add_subplot() got an unexpected keyword argument 'figure'
No description has been provided for this image
In [394]:
N = 80
bottom = 8
max_height = 4

fig, ax = plt.subplots(subplot_kw={'projection': 'polar'}, figsize=(15, 15))

hist_data = np.histogram(angles, range = [0., np.pi * 2.], bins = 80)

theta = hist_data[1]
theta = (theta[1:] + theta[:-1]) / 2.

counts = hist_data[0]
radii = (counts / np.max(counts)) * max_height
width = (2*np.pi) / N

#ax = plt.subplot(111, polar=True)
bars = ax.bar(theta, radii, width=width, bottom=bottom)
#ax.xaxis("off")
#ax.axes.get_yaxis().set_visible(False)
ax.axes.get_yaxis().set_ticks([8., 9., 10., 11.])
ax.axes.yaxis.set_ticklabels([])
ax.tick_params(axis='x', labelrotation=270)

# Use custom colors and opacity
for r, bar in zip(radii, bars):
    bar.set_facecolor(plt.cm.viridis(r / 10.))
    bar.set_alpha(0.8)

plt.show()
No description has been provided for this image

Lipid Distance Matrix¶

6.1) Reinitialize Data¶

In [17]:
# Load trajectory
u = mda.Universe('reimage/image.tpr', 'reimage/reimaged.xtc')


# Create selections for groups that may be used for analysis
selection = "(resname POPC and name PO4 NC3) or "            + \
            "(resname POPS and name PO4 CNO) or "            + \
            "(resname POP2 and name P1 P2 C1 C2 C3 PO4) or " + \
            "(resname CHOL)" 
head_group = u.select_atoms(selection)
# Create selections for groups that may be used for analysis
bilayer = u.select_atoms("resname POPC or resname POPS or " + \
                         "resname POP2 or resname CHOL")

6.2) Collect Lipid Head Center of Mass Data¶

In [18]:
lipid_head_com = {}
#for lip in ["POPC", "POPS", "POP2", "CHOL"]:
#for lip in ["POPC"]:
for lip in ["POPC", "POPS", "POP2", "CHOL"]:
    # Generate selection and np.array to store data.
    lipid_sele = head_group.select_atoms("resname " + lip)
    lipid_head_com[lip] = np.empty
    lipid_head_com[lip] = np.empty((int(u.trajectory.n_frames / frame_skip) + 1, 
                              lipid_sele.n_residues, 
                              3))
    ii = 0
    for ts in u.trajectory[::frame_skip]:
        # Check progress
        if ii % track_divisor == 0: 
            print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
        lipid_sele.positions  = lipid_sele.positions - bilayer.center_of_mass()
        lipid_head_com[lip][ii, :] = lipid_sele.center_of_mass(compound = 'residues')
        ii += 1
    print(' ')
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%  

6.3) Get Upper Leaflet Head Group COMs¶

In [19]:
for lip in ["POPC", "POPS", "POP2", "CHOL"]:
    head_com = lipid_head_com[lip]
    
    # Get the magnitude of the COM vectors
    dist_from_center = np.sqrt(np.sum(head_com[0, :, :] * head_com[0, :, :], axis = 1))
    # Use the average magnitude to seperate the distributions
    upper_leaf_min = np.mean(dist_from_center)
    upper_selection = dist_from_center > upper_leaf_min
    dist_from_center = np.sqrt(np.sum(head_com[0, :, :] * head_com[0, :, :], axis = 1))
    upper_selection = [ii for ii in range(len(upper_selection)) if upper_selection[ii]]
    
    # Define the upper-leaflet lipids
    lipid_head_com[lip] = head_com[:, upper_selection, :]

6.4) Function to Cluster a Matrix Based on Row/Colum Similarity¶

In [ ]:
import scipy
import scipy.cluster.hierarchy as sch

def cluster_corr(corr_array, inplace=False):
    """
    Rearranges the correlation matrix, corr_array, so that groups of highly 
    correlated variables are next to eachother 
    
    Parameters
    ----------
    corr_array : pandas.DataFrame or numpy.ndarray
        a NxN correlation matrix 
        
    Returns
    -------
    pandas.DataFrame or numpy.ndarray
        a NxN correlation matrix with the columns and rows rearranged
    """
    pairwise_distances = sch.distance.pdist(corr_array)
    linkage = sch.linkage(pairwise_distances, method='complete')
    cluster_distance_threshold = pairwise_distances.max()/2
    idx_to_cluster_array = sch.fcluster(linkage, cluster_distance_threshold, 
                                        criterion='distance')
    idx = np.argsort(idx_to_cluster_array)
    
    if not inplace:
        corr_array = corr_array.copy()
    
    if isinstance(corr_array, pd.DataFrame):
        return corr_array.iloc[idx, :].T.iloc[idx, :]
    return corr_array[idx, :][:, idx]

6.5) Calculate and Plot Distance Matrices for First and Last Frames¶

In [213]:
# Select lipid to perform calculation for
lip = "POP2"

# Generate plot
fig, axs = plt.subplots(1, 2, figsize = (12, 5))
jj = 0
for ii in [0, -1]:
    '''
    Create indicies for selecting all combinations of lipids
    '''
    axis_1 = np.linspace(0, 582 - 1, 582, dtype = int)
    axis_2 = np.linspace(0, 582 - 1, 582, dtype = int)
    index_1, index_2 = np.array(np.meshgrid(axis_1, axis_2))
    
    x = index_1.flatten()
    y = index_2.flatten()

    '''
    Calculate the arclengths between all lipids
    '''
    # Collect the lipid coordinates for the frame
    frame = lipid_head_com[lip][ii, :, :]
    
    # Calculate the arc-length between all all lipids
    angles = np.sum(frame[x, :] * frame[y, :], axis = 1)
    angles = angles /                                 \
             (np.linalg.norm(frame[x, :], axis = 1) * \
              np.linalg.norm(frame[y, :], axis = 1))
    angles = np.arccos(angles)
    arclengths = angles * average_upper_radius
    
    '''
    Reshape arclengths into matrix and generate grid
    coordinated for plotting
    '''
    z = arclengths.reshape(582, 582)
    z = np.nan_to_num(z)
    x = x.reshape(582, 582)
    y = y.reshape(582, 582)
    
    # Cluster matrix based on similarity
    z = cluster_corr(z)
    
    '''
    Plot Results
    '''
    axs[jj].contourf(x, y, z, levels = [0, 50.], cmap = "viridis")
    jj += 1
plt.show()
/tmp/ipykernel_7545/3285562180.py:29: RuntimeWarning: invalid value encountered in arccos
  angles = np.arccos(angles)
No description has been provided for this image

Lipid Contacts¶

Reinitialize the Data¶

In [35]:
# Load trajectory
u = mda.Universe('reimage/image.tpr', 'reimage/reimaged.xtc')

# Create selections for groups that may be used for analysis
selection = "(not resname POPC) and " + \
            "(not resname POPS) and " + \
            "(not resname POP2) and " + \
            "(not resname CHOL)" 
protein    = u.select_atoms(selection)
# Create selections for groups that may be used for analysis
selection = "resname POPC or resname POPS or " + \
            "resname POP2 or resname CHOL"
bilayer = u.select_atoms(selection)

Define Functions¶

  1. Generate the pairwise combinations of a given list-like.
  2. Defines a ufunc to convert an array of 3-letter amino acid codes to an array of 1-letter codes.
In [36]:
def numpy_pairwise_combinations(x):
    idx = np.stack(np.triu_indices(len(x), k = 0), axis=-1)
    return x[idx]

convert_aa = np.frompyfunc(mda.lib.util.convert_aa_code, 1, 1)

Calculate Pairwise Distances¶

In [244]:
%%time
# Used to collect all of the information about each 
# contact
contact_data = {"Distance" : [], "Partner_1" : [], 
                "Partner_2" : [], "Contact" : []}
avg_contacts = []
ii = 0
for ts in u.trajectory[::100]:
    test_data = lipid_head_com["POP2"][ii, :, :]
    test_data 
    search_results = mda.lib.distances.self_capped_distance(test_data, 20.0)
    counts = np.unique(search_results[0].flatten(), return_counts = True)[1]
    counts = np.concatenate([np.zeros(582 - len(counts)), counts])
    
    avg_contacts.append(np.mean(counts))
    
    ii += 1
CPU times: user 12.9 s, sys: 115 ms, total: 13 s
Wall time: 13 s
In [253]:
# Initialize plotting.
fig, ax = plt.subplots(figsize = (15, 4))

# Remove top and right spines from the figure.
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
# ---
ax.plot(time, avg_contacts) # Plot data
# ---
ax.set_xlabel("Time (us)")  # Set x label
ax.set_ylabel("Avg Pairwise Dist (Ã…)") # Set y label

# Calculate equation for trendline
z = np.polyfit(time, avg_contacts, 1)
p = np.poly1d(z)

slope, intercept, r_value, p_value, std_err = \
    scipy.stats.linregress(time, avg_contacts)

# Add trendline to plot
ax.plot(time, p(time), linestyle = "dashed",color = "orange")

#ax.annotate("R² = " + str(round(r_value, 2)), xy = (0., 3.), 
#            xytext = (0., 3.))

plt.show()
No description has been provided for this image